├── .gitignore
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── VERSION
├── conf.py
├── docs
    ├── CLI.md
    ├── README.md
    ├── assets
    │   ├── ion_client.gif
    │   ├── ion_client_cli.png
    │   ├── ion_client_py.png
    │   ├── ion_server.gif
    │   ├── ion_server_cli.png
    │   ├── ion_server_py.png
    │   ├── logo.png
    │   ├── pom_battle_1.png
    │   ├── pom_battle_2.png
    │   └── pom_env_output.png
    ├── competitions.md
    ├── environment.md
    ├── game_rules.md
    ├── getting_started.md
    ├── index.md
    ├── multiplayer.md
    ├── pommerman.bib
    └── research.md
├── env.yml
├── examples
    ├── docker-agent
    │   ├── Dockerfile
    │   └── run.py
    └── simple_ffa_run.py
├── manager
    ├── app.py
    ├── app.wsgi
    ├── celery_.py
    ├── requirements.txt
    └── run_celery.sh
├── mkdocs.yml
├── notebooks
    └── Playground.ipynb
├── pommerman
    ├── README.md
    ├── __init__.py
    ├── agents
    │   ├── __init__.py
    │   ├── base_agent.py
    │   ├── docker_agent.py
    │   ├── http_agent.py
    │   ├── player_agent.py
    │   ├── player_agent_blocking.py
    │   ├── random_agent.py
    │   ├── simple_agent.py
    │   └── tensorforce_agent.py
    ├── characters.py
    ├── cli
    │   ├── __init__.py
    │   ├── run_battle.py
    │   └── train_with_tensorforce.py
    ├── configs.py
    ├── constants.py
    ├── envs
    │   ├── __init__.py
    │   ├── v0.py
    │   ├── v1.py
    │   └── v2.py
    ├── forward_model.py
    ├── graphics.py
    ├── helpers
    │   └── __init__.py
    ├── network
    │   ├── README.md
    │   ├── __init__.py
    │   ├── client
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   └── network.py
    │   └── server
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── match.py
    │   │   └── network.py
    ├── resources
    │   ├── Agent0-No-Background.png
    │   ├── Agent0-Team-No-Background.png
    │   ├── Agent0-Team.png
    │   ├── Agent0.png
    │   ├── Agent1-No-Background.png
    │   ├── Agent1-Team-No-Background.png
    │   ├── Agent1-Team.png
    │   ├── Agent1.png
    │   ├── Agent2-No-Background.png
    │   ├── Agent2-Team-No-Background.png
    │   ├── Agent2-Team.png
    │   ├── Agent2.png
    │   ├── Agent3-No-Background.png
    │   ├── Agent3-Team-No-Background.png
    │   ├── Agent3-Team.png
    │   ├── Agent3.png
    │   ├── AgentDummy-No-Background.png
    │   ├── AgentDummy.png
    │   ├── Bomb-1.png
    │   ├── Bomb-10.png
    │   ├── Bomb-2.png
    │   ├── Bomb-3.png
    │   ├── Bomb-4.png
    │   ├── Bomb-5.png
    │   ├── Bomb-6.png
    │   ├── Bomb-7.png
    │   ├── Bomb-8.png
    │   ├── Bomb-9.png
    │   ├── Bomb.png
    │   ├── Cousine-Regular.ttf
    │   ├── ExtraBomb.png
    │   ├── Flames.png
    │   ├── Fog.png
    │   ├── IncrRange.png
    │   ├── Kick.png
    │   ├── Passage.png
    │   ├── Rigid.png
    │   ├── Skull.png
    │   ├── Wood.png
    │   └── X-No-Background.png
    ├── runner
    │   ├── __init__.py
    │   └── docker_agent_runner.py
    └── utility.py
├── pylintrc
├── requirements.txt
├── requirements_extra.txt
├── scripts
    ├── build_sample_docker_agent
    └── run_sample_docker_agent
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | .pytest_cache/
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule.*
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # Environments
 83 | .env
 84 | .venv
 85 | env/
 86 | venv*/
 87 | ENV/
 88 | env.bak/
 89 | venv.bak/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | 
105 | # End of https://www.gitignore.io/api/python
106 | 
107 | #IDEA
108 | .idea
109 | .vscode/
110 | 
111 | # Compiled source #
112 | ###################
113 | *.com
114 | *.class
115 | *.dll
116 | *.exe
117 | *.o
118 | *.so
119 | *.pyc
120 | 
121 | # Packages #
122 | ############
123 | # it's better to unpack these files and commit the raw source
124 | # git has its own built in compression methods
125 | *.7z
126 | *.dmg
127 | *.gz
128 | *.iso
129 | *.jar
130 | *.rar
131 | *.tar
132 | *.zip
133 | 
134 | # Logs and databases #
135 | ######################
136 | *.log
137 | *.sql
138 | *.sqlite
139 | 
140 | # OS generated files #
141 | ######################
142 | .DS_Store
143 | .DS_Store?
144 | ._*
145 | .Spotlight-V100
146 | .Trashes
147 | Icon?
148 | ehthumbs.db
149 | Thumbs.db
150 | 
151 | # NPM and SASS compilation #
152 | #############################
153 | .sass-cache
154 | node_modules
155 | 
156 | # My own #
157 | ######################
158 | *~
159 | *#*
160 | venv
161 | documents
162 | *.p
163 | audio
164 | dump.rdb
165 | local_config.py
166 | *sublime-workspace
167 | *sublime-project
168 | *xcdatamodeld*
169 | *xcodeproject*
170 | concierge.xcodeproj
171 | tmpimg/
172 | local
173 | .eslintrc
174 | 
175 | # Web specific #
176 | ####################
177 | a/static/js/build/*
178 | a/static/css/*
179 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 | 
 5 | script:
 6 |   - # @TODO run tests here
 7 |   - pip install .
 8 |   - "nosetests && pylint -j 0 pommerman"
 9 |   - "nosetests && pylint -j 0 mananger"
10 |   - "nosetests && pylint -j 0 examples"
11 | 
12 | deploy:
13 |   provider: pypi
14 |   user: "cinjon"
15 |   password:
16 |     secure: "WigaDIWWgBGVQ2H2rx7dhLGivPDxLOOjvI+6OPoVgT1Q3/ydloeu9zZWcjEzgP1q9zbajKMFOOdOmO2d/agZpgBY2vDxDfSiPYsasxnxY1ADNP3zpcHO4FMRa6+GZWPKFGjibNTcUgoTsNRxIfPBPGQZpKH33sAnekwlkQIEtOYRFf6gU+4dtn+X0FaoKmwimChnVbiEhPw9YU65/9+3hVsJAtm6m8jxVSr5grN6kQPIK4ItZ2TgRPbTYXAi/t623Gx762emok6UofgdNoobYSmbozFtgG5PC1WcDOnA8nBEyJQOlkXRQvrTQ9THDGa0qhtb6kfNz0zbHe4uQ08QhHZ2EGp1MLnB/pmOhDLS/XKtoPI1mehYApz30ZAsTd+fIwYLUf5eGmiBN5lcJ+unSaY8HwNUD0ZR6YsXLTHJY3LuC9wAUbjlsj/3CkVV7hi/5kzUpg4mkYNw80cffHavWXC1eAw1qC/L6ryjFr1jq2irzIxWKbVEnkpMqGlVom0FrfupsUoSKwPlxEx12QwIzM6qLOKr4CRZzml1sMjsKWF4KpayLCRkWHeMOQh7Tu8P7VhJ7OD/HByrSGb01Sc/KQq89u1xCOb8TEvDvRFXgQZ+i4MIW+uGoW+d66wPwK91m1QwFwmKVNXl4xx9o4K1mf0gOFw9P9RPxgTehAqQPq8="
17 |   on:
18 |     tags: true
19 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Pommerman is dedicated to providing a harassment-free experience for everyone, regardless of gender, gender identity and expression, sexual orientation, disability, physical appearance, body size, age, race, or religion. We do not tolerate harassment of participants in any form.
4 | 
5 | This code of conduct applies to all Pommerman spaces (including Gist comments) both online and off. Anyone who violates this code of conduct may be sanctioned or expelled from these spaces at the discretion of the Pommerman team.
6 | 
7 | We may add additional rules over time, which will be made clearly available to participants. Participants are responsible for knowing and abiding by these rules.


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributor Guide
 2 | 
 3 | We are excited to have your help building Pommerman. There are many ways to contribute to Pommerman. You can contribute code, write documentation and tutorials, or answer questions in our Discord community. Below is a general overview of contributing. We really look forward to your help!
 4 | 
 5 | ## Code of Conduct
 6 | 
 7 | We strive to foster an open community. Please read our [Code of Conduct](https://github.com/MultiAgentLearning/playground/blob/master/CODE_OF_CONDUCT.md).
 8 | 
 9 | ## How to contribute
10 | 
11 | Below is a list of different ways for you to contribute.
12 | 
13 | * Bugfixes
14 | * New features
15 | * Documentation
16 | * Design
17 | * Tutorials
18 | 
19 | This list isn't complete. We very much welcome other ideas. Please come talk to us in our [Discord](https://discord.gg/wjVJEDc) chat.
20 | 
21 | Here's a list that we see as priorities from the community:
22 | 
23 | 1. Better graphics: We want Pommerman to have a more welcoming feel. Right now, it's just pixels. Even replacing the squares with sprites would be really nice. [Issue 5](https://github.com/MultiAgentLearning/playground/issues/7)
24 | 2. Better (and more) baselines: We released the SimpleAgent as a first baseline to beat before submitting agents to compete. We would like to see more there, each with a degree of difficulty and geared towards the different competitions.
25 | 3. Make tutorials: We plan to make a tutorial for each of the learned Agents that we enter. However, it would be awesome if others did as well. This extends from well-documented algorithms like DQN all the way to less considered ones like Evolutionary Learning.
26 | 
27 | 
28 | ## Contributing Code
29 | 
30 | The general workflow for commiting code.
31 | 
32 | * Fork the repository
33 | * Create a local branch for your fix
34 | * Commit your changes and push your created branch to your fork
35 | * Open a new pull request into our master branch
36 | 
37 | ## Formating
38 | 
39 | **Spacing** - In between methods in classes use one line space. Functions, Classes, and groups of variables outside of a Class use two line spaces.
40 | 
41 | **Naming** - Classes use caps camelcase whereas functions, methods, and variables use snake case. Constants are all caps and use snake case. Names should not exceed 80 characters.
42 | 
43 | **Commenting** - Doc string are required for all files, modules, classes, and functions. Comment complicated code or code that isn't easily understood.
44 | 
45 |  
46 | ## Linting 
47 | 
48 | This project uses pylint to ensure code is formatted correctly. You can lint a module space or a single file by using one of the following terminal commands.
49 | 
50 | ```
51 | # A directory or module
52 | pylint pommerman/
53 | 
54 | # A single file
55 | pylint pommerman/utility.
56 | ```
57 | 
58 | If your code doesn't pass linting please make the updates to ensure your code passes. PR's will not be accepted if your code doesn't pass the linter. 
59 | 
60 | You can dig into the how we lint by taking a look at the `pylintrc` file in the root of this repo.
61 | 
62 | **Linting** - Please lint according to the google style. An easy way to do this is to use the yapf pip package: `yapf --style google <path/to/file>`. Include the flag `-i` to edit the file in place.
63 | 
64 | **Linting** - Please lint according to the google style. An easy way to do this is to use the yapf pip package: `yapf --style google <path/to/file>`. Include the flag `-i` to edit the file in place.
65 | 
66 | ## Discord
67 | 
68 | Discussions, correspondence, and announcements often happen in Discord. You can get access through our [Discord invite.](https://discord.gg/wjVJEDc)
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2018 Cinjon Resnick
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Playground
 2 | 
 3 | > First time? check out our [website](https://www.pommerman.com) for more information,
 4 | > our [Discord](https://discordapp.com/invite/wjVJEDc) to join the community,
 5 | > or read the [documentation](./docs) to get started.
 6 | 
 7 | Playground hosts Pommerman, a clone of Bomberman built for AI research. People from around the world submit agents that they've trained to play. We run regular competitions on our servers and report the results and replays.
 8 | 
 9 | There are three variants for which you can enter your agents to compete:
10 | 
11 | * FFA: Free For All where four agents enter and one leaves. It tests planning, tactics, and cunning. The board is fully observable.
12 | * Team (The NIPS '18 Competition environment): 2v2 where two teams of agents enter and one team wins. It tests planning, and tactics, and cooperation. The board is partially observable.
13 | * Team Radio: Like team in that a it's a 2v2 game. Differences are that the agents each have a radio that they can use to convey 2 words from a dictionary of size 8 each step.
14 | 
15 | #### Why should I participate?
16 | 
17 | * You are a machine learning researcher and similarly recognize the lack of approachable benchmarks for this subfield. Help us rectify this and prove that your algorithm is better than others.
18 | * You want to contribute to multi agent or communication research. This is first and foremost a platform for doing research and everything that we do here will eventually get published with generous (or primary) support from us.
19 | * You really like(d) Bomberman and are fascinated by AI. This is a great opportunity to learn how to build intelligent agents.
20 | * You want the glory of winning an AI competition. We are going to publicize the results widely.
21 | * You think AI is dumb and can make a deterministic system that beats any learned agent.
22 | 
23 | #### How do I train agents?
24 | 
25 | Most open-source research tools in this domain have been designed with single agents in mind. We will be developing resources towards standardizing multi-agent learning. In the meantime, we have provided an example training script in train_with_tensorforce.py. It demonstrates how to wrap the Pommerman environments such that they can be trained with popular libraries like TensorForce.
26 | 
27 | #### How do I submit agents that I have trained?
28 | 
29 | The setup for submitting agents will be live shortly. It involves making a [Docker](https://docs.docker.com/get-started/) container that runs your agent. We then read and upload your docker file via Github Deploy Keys. You retain the ownership and license of the agents. We will only look at your code to ensure that it is safe to run, doesn't execute anything malicious, and does not cheat. We are just going to run your agent in competitions on our servers. We have an example agent that already works and further instructions are in the games/a/docker directory.
30 | 
31 | #### Who is running this?
32 | 
33 | [Cinjon Resnick](http://twitter.com/cinjoncin), [Denny Britz](https://twitter.com/dennybritz), [David Ha](https://twitter.com/hardmaru), [Jakob Foerster](https://www.linkedin.com/in/jakobfoerster/), and [Wes Eldridge](https://twitter.com/weseldridge) are the folks behind this. We are generously supported by a host of other people, including [Kyunghyun Cho](https://twitter.com/kchonyc), [Joan Bruna](https://twitter.com/joanbruna), [Julian Togelius](http://julian.togelius.com/) and [Jason Weston](https://research.fb.com/people/weston-jason/). You can find us in the [Discord](https://discordapp.com/invite/wjVJEDc).
34 | 
35 | Pommerman is immensely appreciate of the generous assistance it has received from Jane Street Capital, NVidia, Facebook AI Research, and Google Cloud.
36 | 
37 | #### How can I help?
38 | 
39 | To see the ways you can get invovled with the project head over to our [Contributing Guide](https://github.com/MultiAgentLearning/playground/blob/master/CONTRIBUTING.md) and checkout our current [issues](https://github.com/MultiAgentLearning/playground/issues).
40 | 
41 | # Contributing
42 | 
43 | We welcome contributions through pull request. See [CONTRIBUTING](../master/CONTRIBUTING.md) for more details.
44 | 
45 | # Code of Conduct
46 | 
47 | We strive for an open community. Please read over our [CODE OF CONDUCT](../master/CODE_OF_CONDUCT.md)
48 | 
49 | # Citation
50 | 
51 | If you use the Pommerman environment in your research, please cite us using the [bibtex file](../master/docs/pommerman.bib) in docs.
52 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.2.0
2 | 


--------------------------------------------------------------------------------
/conf.py:
--------------------------------------------------------------------------------
1 | from recommonmark.parser import CommonMarkParser
2 | 
3 | source_parsers = {
4 |     '.md': CommonMarkParser,
5 | }
6 | 
7 | source_suffix = ['.rst', '.md']


--------------------------------------------------------------------------------
/docs/CLI.md:
--------------------------------------------------------------------------------
 1 | # Command-Line Interface
 2 | Pommerman comes with a CLI tool that allows you to quickly launch a game. This can be used to test how well a trained agent plays against other agents.  
 3 | 
 4 | Call this with a config, a game, and a list of agents. The script will start separate threads to operate the agents and then report back the result.  
 5 | 
 6 | An example with all four test agents running FFA:
 7 | ```bash
 8 | pom_battle --agents=test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent --config=PommeFFACompetition-v0
 9 | ```
10 | An example with one player, two random agents, and one test agent:
11 | ```bash
12 | pom_battle --agents=player::arrows,test::agents.SimpleAgent,random::null,random::null --config=PommeFFACompetition-v0
13 | ```
14 | An example with a docker agent:
15 | ```bash
16 | pom_battle --agents=player::arrows,docker::pommerman/test-agent,random::null,random::null --config=PommeFFACompetition-v0
17 | ```
18 | ![pom_battle Output](./assets/pom_battle_1.png)*Output of the 1st example*  
19 | ## Configurations and Options
20 | To get a list of active options you can run `pom_battle --help`. The current list of parameters is:  
21 | * `--game`: Allows you to change the game your agent plays. The default is `pommerman`. Currently only supports `pommerman`  
22 | 
23 | * `--config`: Changes the type of game the agents will play. The default is `PommeFFACompetition-v0`. Other options are `PommeFFACompetition-v0`, `PommeFFAFast-v0`, `PommeFFA-v1`, `PommeRadio-v2`, `PommeTeam-v0`, `PommeTeamFast-v0` and `OneVsOne-v0`.  
24 | 
25 | * `--agents`: Defines the agents participating in the game. The default is 4 simple agents. To changes the agents in the game use a comma delineated list of agent.  
26 | 
27 | * `--agent_env_vars`: Sends enviroment variables to to Docker agents and only Docker agents. The default is "". An example is '0:foo=bar:baz=lar,3:foo=lam', which would send two arguments to Docker Agent 0 and one to Docker Agent 3.  
28 | 
29 | * `--record_pngs_dir`: Defines the directory to record PNGs of the game board for each step. The default is `None`. If the directory doesn't exist, it will be created. The PNGs are saved with the format `%m-%d-%y_%-H-%M-%S_(STEP).png` (`04-17-18_15-54-39_3.png`).  
30 | 
31 | * `--record_json_dir`: Defines the directory to record the JSON representations of the game. The default is `None`. If the directory doesn't exist, it will be created.
32 | 
33 | * `--render`: Allows you to turn of rendering of the game. The default is `False`.  
34 | 
35 | * `--render_mode`: Changes the render mode of the game. The default is `human`. Available options are `human`, `rgb_pixel`, and `rgb_array`.  
36 | 
37 | * `--game_state_file`: Changes the initial state of the game. The file is expected to be in JSON format.  The format of the file is defined below.  
38 |     * `agents`: List of agents serialized (agent_id, is_alive, position, ammo, blast_strength, can_kick)
39 |     * `board`: Board matrix topology (board_size<sup>2</sup>)
40 |     * `board_size`: Board size
41 |     * `bombs`: List of bombs serialized (position, bomber_id, life, blast_strength, moving_direction)
42 |     * `flames`: List of flames serialized (position, life)
43 |     * `items`: List of item by position
44 |     * `step_count`: Step count
45 | 
46 | ![pom_battle Help](./assets/pom_battle_2.png)*Output of help from pom_battle*
47 | ## Training an agent using Tensorforce
48 | Pommerman comes with a trainable agent out of the box. The agent uses a Proximal Policy Optimization (PPO) algorithm. This agent is a good place to start if you want to train your own agent. All of the options that are available in the CLI tool are available in the Tensorforce CLI.    
49 | An example with all three simple agents running FFA:
50 | ```bash
51 | pom_tf_battle --agents=tensorforce::ppo,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent --config=PommeFFACompetition-v0
52 | ```


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
  1 | # Getting Started
  2 | 
  3 | # Pre-requisites
  4 | 
  5 | * [Python 3.6.0](https://www.python.org/downloads/release/python-360/)+ (including `pip`)
  6 | * [Docker](https://www.docker.com/) (only needed for `DockerAgent`)
  7 | * [virtualenv](https://virtualenv.pypa.io/en/stable/) (optional, for isolated Python environment)
  8 | 
  9 | # Installation
 10 | 
 11 | * Clone the repository
 12 | ```
 13 | $ git clone https://github.com/MultiAgentLearning/playground ~/playground
 14 | ```
 15 | 
 16 | ## Pip
 17 | 
 18 | * **OPTIONAL**: Setup an isolated virtual Python environment by running the following commands
 19 | ```
 20 | $ virtualenv ~/venv
 21 | ```
 22 | This environment needs to be activated for usage. Any package installations will now persist
 23 | in this virtual environment folder only.
 24 | ```
 25 | source ~/venv/bin/activate
 26 | ```
 27 | 
 28 | * Install the `pommerman` package. This needs to be done every time the code is updated to get the
 29 | latest modules
 30 | ```
 31 | $ cd ~/playground
 32 | $ pip install -U .
 33 | ```
 34 | 
 35 | ## Conda
 36 | 
 37 | * Install the `pommerman` environment.
 38 | ```
 39 | $ cd ~/playground
 40 | $ conda env create -f env.yml
 41 | $ conda activate pommerman
 42 | ```
 43 | 
 44 | * To update the environment
 45 | ```
 46 | $ conda env update -f env.yml --prune
 47 | ```
 48 | 
 49 | # Examples
 50 | 
 51 | ## A Simple Example
 52 | 
 53 | The [simple_ffa_run.py](../examples/simple_ffa_run.py) runs a sample Free-For-All game with two
 54 | [SimpleAgent](../pommerman/agents/simple_agent.py)s and two [RandomAgent](../pommerman/agents/random_agent.py)s
 55 | on the board.
 56 | 
 57 | ## Using A Docker Agent
 58 | 
 59 | The above example can be extended to use [DockerAgent](../pommerman/agents/docker_agent.py) instead of a
 60 | [RandomAgent](../pommerman/agents/random_agent.py). [examples/docker-agent](../examples/docker-agent) contains
 61 | the code to wrap a [SimpleAgent](../pommerman/agents/simple_agent.py) inside Docker.
 62 | 
 63 | 
 64 | * We will build a docker image with the name "pommerman/simple-agent" using the `Dockerfile` provided.
 65 | ```
 66 | $ cd ~/playground
 67 | $ docker build -t pommerman/simple-agent -f examples/docker-agent/Dockerfile .
 68 | ```
 69 | 
 70 | * The agent list seen in the previous example can now be updated. Note that a `port` argument (of an unoccupied port) is
 71 | needed to expose the HTTP server.
 72 | ```python
 73 | agent_list = [
 74 |     agents.SimpleAgent(),
 75 |     agents.RandomAgent(),
 76 |     agents.SimpleAgent(),
 77 |     agents.DockerAgent("pommerman/simple-agent", port=12345)
 78 | ]
 79 | ```
 80 | 
 81 | ## Playing an interactive game
 82 | 
 83 | You can also play the game! See below for an example where one [PlayerAgent](../pommerman/agents/player_agent.py)
 84 | controls with the `arrow` keys and the other with the `wasd` keys.
 85 | 
 86 | 
 87 | ```python
 88 | agent_list = [
 89 |     agents.SimpleAgent(),
 90 |     agents.PlayerAgent(agent_control="arrows"), # arrows to move, space to lay bomb
 91 |     agents.SimpleAgent(),
 92 |     agents.PlayerAgent(agent_control="wasd"), # W,A,S,D to move, E to lay bomb
 93 | ]
 94 | ```
 95 | 
 96 | ## Submitting an Agent.
 97 | 
 98 | In order to submit an agent, you need to create an account at 
 99 | [pommerman.com](https://pommerman.com). You can do this by registering with your 
100 | email address or logging in with your Github account.
101 | 
102 | Once you have created an account, login and navigate to your profile - 
103 | [Pommerman profile](https://pommerman.com/me). To submit an agent, fill in the 
104 | form with your agent's name, an ssh git url, and the path to your agent's Docker 
105 | file from the github repository's top level directory. Please make sure that 
106 | your docker file builds properly beforehand.
107 | 
108 | Next, you will need to add an ssh deploy key to your account so we can access 
109 | your agent's repo. This is provided to you along with instructions after 
110 | registering the agent.
111 | 
112 | Before doing all of this, note that we use Docker to run the agents. The best example for making a Docker agent is in the repo in the examples/docker-agent directory. This *must* work in order to properly enter an agent, and we suggest using the accompanying pom_battle cli command (or equivalently run_battle.py) to test out your Docker implementation. If you are having trouble still, feel free to ask questions on our Discord channel.
113 | 
114 | ## NIPS Competition Information:
115 | 
116 | Each competitor will submit two agents that will be teamed together. These agents can be the same one and can be in the same repository even, but we expect there to be two submissions for each entrant. We additionally expect there to be notable differences among the submissions. Similarly to the June 3rd competition, we will examine the code before running it on our servers and collusion will not be tolerated.
117 | 
118 | The competition will be held live at NIPS 2018 in Montreal. We would prefer it if serious entrants were there, but that is not a requirement.
119 | 
120 | ## Actually Getting Started
121 | 
122 | Here is some information that may help you more quickly develop successful agents:
123 | 
124 | 1. Two agents cannot move to the same cell. They will bounce back to their prior places if they try. The same applies to bombs. If an agent and a bomb both try to move to the same space, then the agent will succeed but the bomb will bounce back.
125 | 2. If an agent with the can_kick ability moves to a cell with a bomb, then the bomb is kicked in the direction from which the agent came. The ensuing motion will persist until the bomb hits a wall, another agent, or the edge of the grid. 
126 | 3. When a bomb explodes, it immediately reaches its full blast radius. If there is an agent or a wall in the way, then it prematurely ends and destroys that agent or wall. 
127 | 4. If a bomb is in the vicinity of an explosion, then it will also go off. In this way, bombs can chain together.
128 | 5. The SimpleAgent is very useful as a barometer for your own efforts. Four SimpleAgents playing against each other have a win rate of ~18% each with the remaining ~28% of the time being a tie. Keep in mind that it _can_ destroy itself. That can skew your own results if not properly understood.
129 | 


--------------------------------------------------------------------------------
/docs/assets/ion_client.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_client.gif


--------------------------------------------------------------------------------
/docs/assets/ion_client_cli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_client_cli.png


--------------------------------------------------------------------------------
/docs/assets/ion_client_py.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_client_py.png


--------------------------------------------------------------------------------
/docs/assets/ion_server.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_server.gif


--------------------------------------------------------------------------------
/docs/assets/ion_server_cli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_server_cli.png


--------------------------------------------------------------------------------
/docs/assets/ion_server_py.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_server_py.png


--------------------------------------------------------------------------------
/docs/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/logo.png


--------------------------------------------------------------------------------
/docs/assets/pom_battle_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/pom_battle_1.png


--------------------------------------------------------------------------------
/docs/assets/pom_battle_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/pom_battle_2.png


--------------------------------------------------------------------------------
/docs/assets/pom_env_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/pom_env_output.png


--------------------------------------------------------------------------------
/docs/competitions.md:
--------------------------------------------------------------------------------
 1 | # Competitions
 2 | ## Submitting an Agent
 3 | In order to submit an agent, you need to create an account at [Pommerman's website](https://pommerman.com). You can do this by registering with your email address or logging in with your GitHub account.  
 4 | Once you have created an account, login and navigate to your profile - [Pommerman profile](https://pommerman.com/me). To submit an agent, fill in the form with your agent's name, an SSH GIT url, and the path to your agent's Docker file from the GitHub repository's top level directory. Please make sure that your Docker file builds properly beforehand.  
 5 | Next, you will need to add an SSH deploy key to your account so we can access your agent's repository. This is provided to you along with instructions after registering the agent.  
 6 | Before doing all of this, note that we use Docker to run the agents. The best example for making a Docker agent is in the repo in the `examples/docker-agent` directory. This *must* work in order to properly enter an agent, and we suggest using the accompanying `pom_battle_cli` command to test out your Docker implementation. If you are having trouble still, feel free to ask questions on our [Discord channel](index.md#How%20do%20I%20get%20help%20%3F/).  
 7 | ## Rules and Submission
 8 | 1. Each submission should have a Docker file per agent. For FFA and Team Random, there is one agent; For Team Radio, there will be two agents. Instructions and an example for building Docker containers from trained agents can be found in our repository.
 9 | 2. The positions for each agent will be randomized modulo that each agent's position will be opposite from its teammate's position.
10 | 3. The agents should follow the prescribed convention specified in our example code and expose an "act" endpoint that accepts a dictionary of observations. Because we are using Docker containers and http requests, we do not have any requirements for programming language or framework. There will be ample opportunity to test this on our servers beforehand.
11 | 4. If an agent has a bug in its software that causes its container to crash, that will count as a loss for that agent's team.
12 | 5. The expected response from the agent will be a single integer in [0, 5] representing which of the six actions that agent would like to take, as well as two more integers in [1, 8. representing the message if applicable.
13 | 6. If an agent does not respond in an appropriate time limit (100ms), then we will automatically issue them the Stop action and have them send out the message (0, 0) if applicable.
14 | 7. The game setup as described does not allow for the agents to share a centralized controller. If, however, some clever participant figured out a way to force this, they will be subsequently disqualified.
15 | 8. Agents submitted by organizers can participate in the competitions but are not eligible for prizes. They will be excluded from consideration in the final standings.
16 | 9. Competitions will run according to a double elimination style with two brackets. Each battle will be best of three, with the winner moving on and the loser suffering a defeat. Any draws will be replayed. At the end, we will have a clear top four.
17 | ## NIPS 2018
18 | Each competitor will submit two agents that will be teamed together. These agents can be the same one and can be in the same repository even, but we expect there to be two submissions for each entrant. We additionally expect there to be notable differences among the submissions. Similarly to the June 3rd competition, we will examine the code before running it on our servers and collusion will not be tolerated.  
19 | The competition will be held live at NIPS 2018 in Montreal. We would prefer it if serious entrants were there, but that is not a requirement.


--------------------------------------------------------------------------------
/docs/environment.md:
--------------------------------------------------------------------------------
 1 | # Environment Reference
 2 | ## Parameters
 3 | These are parameters for `pommerman.make`:
 4 | ### Configurations
 5 | Configurations that are included in Pommerman. This is the `config_id` parameter of `pommerman.make` as a String:  
 6 | 
 7 | * `PommeFFACompetition-v0` - In this all agents are against each other  
 8 | * `PommeTeamCompetition-v0` - In this teams of two agents each are set against each other. The teams are [Agent0, Agent2] and [Agent1, Agent3]  
 9 | * `OneVsOne-v0` - In this two agents are against each other  
10 | * `PommeFFA-v1` - In this all agents are against each other while the board collapses uniformly by replacing the outermost square with walls  
11 | * `PommeTeamCompetition-v1` - This is similar to PommeFFA-v1 but with Teams instead of being Free-For-All  
12 | * `PommeRadio-v2` - This is similar to `PommeTeamCompetition-v0` but the agents can send a list of two integers in the range [1, 8] to their teammates on every turn.
13 | 
14 | ### Agents
15 | Agent classes that are included in Pommerman. This is the `agent_list` parameter of `pommerman.make` as a list of 4 agent classes. All of these can be found under `pommerman.agents`:  
16 | 
17 | * `base_agent` - This is the class that all agents inherit from   
18 | * `random_agent` - This randomly selects an action and plays it out  
19 | * `simple_agent` - This is an agent based on a non-ML approach (This agent is prone to killing itself)   
20 | * `tensorforce_agent` - This agent calls [TensorForce](https://github.com/reinforceio/tensorforce) to return an action  
21 | * `player_agent` - This is an agent controlled by an keyboard. You can change the control scheme by feeding the `agent_control` parameter as either: `"arrows"` for Arrows = Move and Space = Bomb or `"wasd"` for W,A,S,D = Move, E = Bomb  
22 | * `http_agent` - This agent outputs to accepts input in the form of a REST requests to it  
23 | * `docker_agent` - This agent outputs and accepts inputs to an agent wrapped inside a Docker container  
24 | ## Output
25 | ![Pommerman-enviroment Output](./assets/pom_env_output.png) *This is the output from env.step()*  
26 | 
27 | It has the following format:
28 | 
29 | * Board: The 11x11 board is a numpy array where each value corresponds to one of the representations below. The first element in this 2D array corresponds to the configuration of topmost row of the board and so on.
30 |     * Passage = 0
31 |     * Wooden Wall = 1
32 |     * Rigid Wall = 2
33 |     * Bomb = 3
34 |     * Flames = 4
35 |     * Fog = 5: This is only applicable in the partially observed (2v2 Team Radio) setting.
36 |     * Extra Bomb Power-Up = 6: adds ammo.
37 |     * Increase Range Power-Up = 7: increases the blast_strength
38 |     * Can Kick Power-Up = 8: can kick bombs by touching them.
39 |     * AgentDummy = 9
40 |     * Agent0 = 10
41 |     * Agent1 = 11
42 |     * Agent2 = 12
43 |     * Agent3 = 13
44 | * Position: A tuple of Ints of (X position, Y position)
45 | * Ammo: An Int representing the amount of ammo this agent has. 
46 | * Blast Strength: An Int representing the blast strength of this agent's bombs.
47 | * Can Kick: Whether this agent can kick bombs. This ability is gained by stepping on the can kick power-up.
48 | * Teammate: One Int in [9, 13].  Which agent is this agent's teammate. In the FFA game, this is the AgentDummy.
49 | * Enemies: A list of three Ints, each in [9, 13]. Which agents are this agent's enemies. There are three here to be amenable to all variants of the game. When there are only two enemies like in the team competitions, the last Int will be the AgentDummy to reflect the fact that there are only two enemies.
50 | * Bomb Blast Strength: An 11x11 numpy int array representing the bombs' blast strengths in the agent's view. Everything outside of its view will be fogged out.
51 | * Bomb Life: An 11x11 numpy int array representing the bombs' life in the agent's view. Everything outside of its view will be fogged out.
52 | * Bomb Movement Direction: An 11x11 numpy int array representing the bombs' movement direction (in terms of an agent's action space: 1 -> up, 2 -> down etc...) in the agent's view. Everything outside of its view will be fogged out.
53 | * Flame Life: An 11x11 numpy int array representing the flames' life in the agent's view. Everything outside of its view will be fogged out.
54 | * Message: (Team Radio only) A list of two Ints, each in [0, 8]. The message being relayed from the teammate. Both ints are zero when a teammate is dead or it's the first step. Otherwise they are in [1, 8].
55 | 


--------------------------------------------------------------------------------
/docs/game_rules.md:
--------------------------------------------------------------------------------
 1 | # Rules and Submission
 2 | 
 3 | 1) Each submission should have a Docker file per agent. For FFA and Team Random, there is one agent; For Team Radio, there will be two agents. Instructions and an example for building Docker containers from trained agents can be found in our repository.
 4 | 
 5 | 2) The positions for each agent will be randomized modulo that each agent's position will be opposite from its teammate's position.
 6 | 
 7 | 3) The agents should follow the prescribed convention specified in our example code and expose an "act" endpoint that accepts a dictionary of observations. Because we are using Docker containers and http requests, we do not have any requirements for programming language or framework. There will be ample opportunity to test this on our servers beforehand.
 8 | 
 9 | 4) If an agent has a bug in its software that causes its container to crash, that will count as a loss for that agent's team.
10 | 
11 | 5) The expected response from the agent will be a single integer in [0, 5] representing which of the six actions that agent would like to take, as well as two more integers in [1, 8] representing the message if applicable.
12 | 
13 | 6) If an agent does not respond in an appropriate time limit (100ms), then we will automatically issue them the Stop action and have them send out the message (0, 0) if applicable.
14 | 
15 | 7) The game setup as described does not allow for the agents to share a centralized controller. If, however, some clever participant figured out a way to force this, they will be subsequently disqualified.
16 | 
17 | 8) Agents submitted by organizers can participate in the competitions but are not eligible for prizes. They will be excluded from consideration in the final standings.
18 | 
19 | 9) Competitions will run according to a double elimination style with two brackets. Each battle will be best of three, with the winner moving on and the loser suffering a defeat. Any draws will be replayed. At the end, we will have a clear top four.
20 | 


--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
  1 | # Getting Started
  2 | ## Pre-requisites
  3 | * [Python 3.6.0](https://www.python.org/downloads/release/python-360/)+ (including `pip`)
  4 | * [Docker](https://www.docker.com/) (only needed for `DockerAgent`)
  5 | * [virtualenv](https://virtualenv.pypa.io/en/stable/) (optional, for isolated Python environment)
  6 | ## Installation
  7 | * Clone the repository
  8 | ```
  9 | $ git clone https://github.com/MultiAgentLearning/playground ~/playground
 10 | ```
 11 | ## Pip
 12 | * **OPTIONAL**: Setup an isolated virtual Python environment by running the following commands
 13 | ```
 14 | $ virtualenv ~/venv
 15 | ```
 16 | This environment needs to be activated for usage. Any package installations will now persist
 17 | in this virtual environment folder only.
 18 | ```
 19 | source ~/venv/bin/activate
 20 | ```
 21 | * Install the `pommerman` package. This needs to be done every time the code is updated to get the
 22 | latest modules
 23 | ```
 24 | $ cd ~/playground
 25 | $ pip install -U .
 26 | ```
 27 | ## Conda
 28 | * Install the `pommerman` environment.
 29 | ```
 30 | $ cd ~/playground
 31 | $ conda env create -f env.yml
 32 | $ conda activate pommerman
 33 | ```
 34 | * To update the environment
 35 | ```
 36 | $ conda env update -f env.yml --prune
 37 | ```
 38 | ## Examples
 39 | ### Free-For-All
 40 | The code below runs a sample Free-For-All game with two **SimpleAgent**'s and two **RandomAgent**'s on the board.  
 41 | ```python
 42 | #!/usr/bin/python
 43 | """A simple Free-For-All game with Pommerman."""
 44 | import pommerman
 45 | from pommerman import agents
 46 | 
 47 | 
 48 | def main():
 49 |     """Simple function to bootstrap a game"""
 50 |     # Print all possible environments in the Pommerman registry
 51 |     print(pommerman.REGISTRY)
 52 | 
 53 |     # Create a set of agents (exactly four)
 54 |     agent_list = [
 55 |         agents.SimpleAgent(),
 56 |         agents.RandomAgent(),
 57 |         agents.SimpleAgent(),
 58 |         agents.RandomAgent(),
 59 |         # agents.DockerAgent("pommerman/simple-agent", port=12345),
 60 |     ]
 61 |     # Make the "Free-For-All" environment using the agent list
 62 |     env = pommerman.make('PommeFFACompetition-v0', agent_list)
 63 | 
 64 |     # Run the episodes just like OpenAI Gym
 65 |     for i_episode in range(1):
 66 |         state = env.reset()
 67 |         done = False
 68 |         while not done:
 69 |             env.render()
 70 |             actions = env.act(state)
 71 |             state, reward, done, info = env.step(actions)
 72 |         print('Episode {} finished'.format(i_episode))
 73 |     env.close()
 74 | 
 75 | 
 76 | if __name__ == '__main__':
 77 |     main()
 78 | ```
 79 | ### Docker Agent
 80 | The above example can be extended to use **DockerAgent** instead of a **RandomAgent**. The code below wraps a **SimpleAgent** inside Docker.  
 81 | ```python
 82 | #!/usr/bin/python
 83 | """Implementation of a simple deterministic agent using Docker."""
 84 | 
 85 | from pommerman import agents
 86 | from pommerman.runner import DockerAgentRunner
 87 | 
 88 | 
 89 | class MyAgent(DockerAgentRunner):
 90 |     """An example Docker agent class"""
 91 | 
 92 |     def __init__(self):
 93 |         self._agent = agents.SimpleAgent()
 94 | 
 95 |     def act(self, observation, action_space):
 96 |         return self._agent.act(observation, action_space)
 97 | 
 98 | 
 99 | def main():
100 |     """Inits and runs a Docker Agent"""
101 |     agent = MyAgent()
102 |     agent.run()
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     main()
107 | ```
108 | * We will build a docker image with the name `pommerman/simple-agent` using the `Dockerfile` provided.
109 | ```shell
110 | $ cd ~/playground
111 | $ docker build -t pommerman/simple-agent -f examples/docker-agent/Dockerfile .
112 | ```
113 | 
114 | * The agent list seen in the previous example can now be updated. Note that a `port` argument (of an unoccupied port) is
115 | needed to expose the HTTP server.
116 | ```python
117 | #!/usr/bin/python
118 | agent_list = [
119 |     agents.SimpleAgent(),
120 |     agents.RandomAgent(),
121 |     agents.SimpleAgent(),
122 |     agents.DockerAgent("pommerman/simple-agent", port=12345)
123 | ]
124 | ```
125 | ## Playing an interactive game
126 | You can also play the game! See below for an example where one **PlayerAgent** controls with the `Arrow` keys and the other with the `WASD` keys.
127 | ```python
128 | #!/usr/bin/python
129 | agent_list = [
130 |     agents.SimpleAgent(),
131 |     agents.PlayerAgent(agent_control="arrows"), # Arrows = Move, Space = Bomb
132 |     agents.SimpleAgent(),
133 |     agents.PlayerAgent(agent_control="wasd"), # W,A,S,D = Move, E = Bomb
134 | ]
135 | ```
136 | 
137 | ## NeurIPS 2018 Docker Agents
138 | 
139 | To test your agent against 2018 NeurIPS competition agents you can download an agent using `docker pull`...
140 | 
141 | ```
142 | docker pull multiagentlearning/hakozakijunctions
143 | ```
144 | 
145 | The following agents are available: `multiagentlearning/hakozakijunctions`, `multiagentlearning/dypm.1`, `multiagentlearning/dypm.2`, `multiagentlearning/navocado`, `multiagentlearning/skynet955`, `multiagentlearning/eisenach`
146 | 
147 | To use an agent once you have pulled it from docker hub use a command like the following.
148 | 
149 | ```
150 | pom_battle --agents=MyAgent,docker::multiagentlearning/navocado,player::arrows,docker::multiagentlearning/eisenach --config=PommeRadioCompetition-v2
151 | ```
152 | 
153 | ## Useful information
154 | 1. Two agents cannot move to the same cell. They will bounce back to their prior places if they try. The same applies to bombs. If an agent and a bomb both try to move to the same space, then the agent will succeed but the bomb will bounce back.
155 | 2. If an agent with the can_kick ability moves to a cell with a bomb, then the bomb is kicked in the direction from which the agent came. The ensuing motion will persist until the bomb hits a wall, another agent, or the edge of the grid. 
156 | 3. When a bomb explodes, it immediately reaches its full blast radius. If there is an agent or a wall in the way, then it prematurely ends and destroys that agent or wall. 
157 | 4. If a bomb is in the vicinity of an explosion, then it will also go off. In this way, bombs can chain together.
158 | 5. The SimpleAgent is very useful as a barometer for your own efforts. Four SimpleAgents playing against each other have a win rate of ~18% each with the remaining ~28% of the time being a tie. Keep in mind that it **can** destroy itself. That can skew your own results if not properly understood.
159 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Pommerman
 2 | ![Pommerman](./assets/logo.png)
 3 | ## Introduction
 4 | We are machine learning researchers exploring how to train agents that can operate in environments with other learning agents, both cooperatively and adversarially. Whether you are a student or a well-oiled machine, we want you to help us advance the state of the art by building agents.
 5 | 
 6 | ## How do I get help ?
 7 | Join our Discord chat and check out the Github, which has all the instructions and details for training an agent in the environment and then submitting it to compete on [competitions](competitions.md).  
 8 | 
 9 | * [GitHub](https://github.com/MultiAgentLearning/playground)  
10 | * [Discord](https://discord.gg/wjVJEDc)  
11 |   
12 | ## What's in the docs ?  
13 | * [Getting Started](getting_started.md)  
14 | * [Command Line Interface](CLI.md)  
15 | * [Multiplayer](multiplayer.md)  
16 | * [Competitions](competitions.md)  
17 | * [Environment Reference](environment.md)  
18 | * [Research Topics](research.md)  


--------------------------------------------------------------------------------
/docs/multiplayer.md:
--------------------------------------------------------------------------------
 1 | # Multiplayer
 2 | The multiplayer component of Pommerman has two components a Client and a Server:
 3 | ## Client
 4 | ### CLI
 5 | This is primarily for a single or a few matches. It can be launched using `ion_client`.  
 6 | #### Output
 7 | ![Client CLI](./assets/ion_client_cli.png)
 8 | ### API
 9 | This is for running a sizable amount of matches. It can be accessed by `pommerman.network.ion_client`. 
10 | 
11 | #### Code Example
12 | ```python
13 | #!/usr/bin/python
14 | import pommerman
15 | 
16 | matches=10 # Amount of matches to play
17 | 
18 | net = pommerman.network.client.Network("play.pommerman.com:5050") # This is essentially a single player to the server. If you want to have multiple players use one for each player.
19 | result = {"reward":[], "match_id":[]} # Note: We save match ID so we can view replays later
20 | for i in range(matches):
21 |     reward, match_id = pommerman.network.client.match(network=net, room=False, agent=pommerman.agents.SimpleAgent)
22 |     # or pommerman.network.client.match(network=net, room="someroom", agent=pommerman.agents.SimpleAgent)
23 |     result["reward"].append(reward)
24 |     result["match_id"].append(match_id)
25 | print(f"Average reward: {sum(result['reward'])/len(result['reward'])}")
26 | print("The match IDs of played matches:")
27 | for i in range(matches):
28 |     print(f"{i+1}. {result['match_id'][i]}")
29 | ```
30 | #### Output
31 | ![Client API](./assets/ion_client_py.png)
32 | ## Server
33 | ### CLI
34 | This launches a Server. It can be launched using `ion_server`.  
35 | #### Output
36 | ![Server CLI](./assets/ion_server_cli.png)
37 | ### API
38 | This programatically launches a Server. It can be accessed by `pommerman.network.server`.  
39 | #### Code Example
40 | ```python
41 | #!/usr/bin/python
42 | import pommerman
43 | 
44 | if __name__ == '__main__':
45 |     while True: # Re-run the server incase it crashes (This is not actually recommended because you should investigate the error)
46 |         try:
47 |             pommerman.network.server.run(port=5050, max_players=8, timeout=2, mode="PommeTeamCompetition-v0", ui_en=True)
48 |         except Exception as e:
49 |             print(f"The server has crashed. The exception was {e}")
50 | ```
51 | #### Output
52 | ![Server API](./assets/ion_server_py.png)


--------------------------------------------------------------------------------
/docs/pommerman.bib:
--------------------------------------------------------------------------------
 1 | @article{DBLP:journals/corr/abs-1809-07124,
 2 |   author    = {Cinjon Resnick and
 3 |                Wes Eldridge and
 4 |                David Ha and
 5 |                Denny Britz and
 6 |                Jakob Foerster and
 7 |                Julian Togelius and
 8 |                Kyunghyun Cho and
 9 |                Joan Bruna},
10 |   title     = {Pommerman: {A} Multi-Agent Playground},
11 |   journal   = {CoRR},
12 |   volume    = {abs/1809.07124},
13 |   year      = {2018},
14 |   url       = {http://arxiv.org/abs/1809.07124},
15 |   archivePrefix = {arXiv},
16 |   eprint    = {1809.07124},
17 |   timestamp = {Fri, 05 Oct 2018 11:34:52 +0200},
18 |   biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1809-07124},
19 |   bibsource = {dblp computer science bibliography, https://dblp.org}
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/docs/research.md:
--------------------------------------------------------------------------------
 1 | # Research
 2 | 1. Proximal Policy Optimization (PPO) [https://arxiv.org/abs/1707.06347](https://arxiv.org/abs/1707.06347)
 3 | 2. Multi-Agent DDPG [https://github.com/openai/maddpg](https://github.com/openai/maddpg)
 4 | 3. Monte Carlo Tree Search [https://gnunet.org/sites/default/files/Browne%20et%20al%20-%20A%20survey%20of%20MCTS%20methods.pdf](https://gnunet.org/sites/default/files/Browne%20et%20al%20-%20A%20survey%20of%20MCTS%20methods.pdf)
 5 | 4. Monte Carlo Tree Search and Reinforcement Learning [https://www.jair.org/media/5507/live-5507-10333-jair.pdf](https://www.jair.org/media/5507/live-5507-10333-jair.pdf)
 6 | 5. Cooperative Multi-Agent Learning [https://link.springer.com/article/10.1007/s10458-005-2631-2](https://link.springer.com/article/10.1007/s10458-005-2631-2)
 7 | 6. Opponent Modeling in Deep Reinforcement Learning [http://www.umiacs.umd.edu/~hal/docs/daume16opponent.pdf](http://www.umiacs.umd.edu/~hal/docs/daume16opponent.pdf)
 8 | 7. Machine Theory of Mind [https://arxiv.org/pdf/1802.07740.pdf](https://arxiv.org/pdf/1802.07740.pdf)
 9 | 8. Coordinated Multi-Agent Imitation Learning [https://arxiv.org/pdf/1703.03121.pdf](https://arxiv.org/pdf/1703.03121.pdf)
10 | 9. Deep Reinforcement Learning from Self-Play in Imperfect-Information Games [https://arxiv.org/pdf/1603.01121.pdf](https://arxiv.org/pdf/1603.01121.pdf) and[http://proceedings.mlr.press/v37/heinrich15.pdf](http://proceedings.mlr.press/v37/heinrich15.pdf)
11 | 10. Autonomous Agents Modelling Other Agents [http://www.cs.utexas.edu/~pstone/Papers/bib2html-links/AIJ18-Albrecht.pdf](http://www.cs.utexas.edu/~pstone/Papers/bib2html-links/AIJ18-Albrecht.pdf)


--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
 1 | name: pommerman
 2 | channels:
 3 |   - defaults
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.7.*
 7 |   - pip
 8 |   - docker-py=3.*
 9 |   - scipy=1.*
10 |   - pillow=5.*
11 |   - ruamel.yaml=0.*
12 |   - flask=0.*
13 |   - requests=2.*
14 |   - astroid>=2
15 |   - isort=4.3.*
16 |   - pylint>=2
17 |   - websockets=6.*
18 |   - websocket-client=0.53.*
19 |   - python-rapidjson=0.6.*
20 |   - click=7.0
21 |   - pyglet>=1.2.0
22 |   - jsonschema<3.0.0
23 |   - tabulate
24 |   - unidecode
25 |   - colorama
26 |   - future
27 |   - typed-ast>=1.3.0
28 |   - pip:
29 |     - gym~=0.10.5
30 |     - jsonmerge~=1.5.1
31 |     - python-cli-ui~=0.7.1
32 |     - ./
33 | 


--------------------------------------------------------------------------------
/examples/docker-agent/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6
 2 | 
 3 | ADD ./examples/docker-agent /agent
 4 | 
 5 | # @TODO to be replaced with `pip install pommerman`
 6 | ADD . /pommerman
 7 | RUN cd /pommerman && pip install .
 8 | # end @TODO
 9 | 
10 | EXPOSE 10080
11 | 
12 | ENV NAME Agent
13 | 
14 | # Run app.py when the container launches
15 | WORKDIR /agent
16 | ENTRYPOINT ["python"]
17 | CMD ["run.py"]
18 | 


--------------------------------------------------------------------------------
/examples/docker-agent/run.py:
--------------------------------------------------------------------------------
 1 | """Implementation of a simple deterministic agent using Docker."""
 2 | 
 3 | from pommerman import agents
 4 | from pommerman.runner import DockerAgentRunner
 5 | 
 6 | 
 7 | class MyAgent(DockerAgentRunner):
 8 |     '''An example Docker agent class'''
 9 | 
10 |     def __init__(self):
11 |         self._agent = agents.SimpleAgent()
12 | 
13 |     def init_agent(self, id, game_type):
14 |         return self._agent.init_agent(id, game_type)
15 | 
16 |     def act(self, observation, action_space):
17 |         return self._agent.act(observation, action_space)
18 | 
19 |     def episode_end(self, reward):
20 |         return self._agent.episode_end(reward)
21 | 
22 |     def shutdown(self):
23 |         return self._agent.shutdown()
24 | 
25 | 
26 | def main():
27 |     '''Inits and runs a Docker Agent'''
28 |     agent = MyAgent()
29 |     agent.run()
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/examples/simple_ffa_run.py:
--------------------------------------------------------------------------------
 1 | '''An example to show how to set up an pommerman game programmatically'''
 2 | import pommerman
 3 | from pommerman import agents
 4 | 
 5 | 
 6 | def main():
 7 |     '''Simple function to bootstrap a game.
 8 |        
 9 |        Use this as an example to set up your training env.
10 |     '''
11 |     # Print all possible environments in the Pommerman registry
12 |     print(pommerman.REGISTRY)
13 | 
14 |     # Create a set of agents (exactly four)
15 |     agent_list = [
16 |         agents.SimpleAgent(),
17 |         agents.RandomAgent(),
18 |         agents.SimpleAgent(),
19 |         agents.RandomAgent(),
20 |         # agents.DockerAgent("pommerman/simple-agent", port=12345),
21 |     ]
22 |     # Make the "Free-For-All" environment using the agent list
23 |     env = pommerman.make('PommeFFACompetition-v0', agent_list)
24 | 
25 |     # Run the episodes just like OpenAI Gym
26 |     for i_episode in range(1):
27 |         state = env.reset()
28 |         done = False
29 |         while not done:
30 |             env.render()
31 |             actions = env.act(state)
32 |             state, reward, done, info = env.step(actions)
33 |         print('Episode {} finished'.format(i_episode))
34 |     env.close()
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     main()
39 | 


--------------------------------------------------------------------------------
/manager/app.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import celery_ as celery
  5 | import docker
  6 | from flask import Flask, jsonify, request
  7 | import requests
  8 | 
  9 | app = Flask(__name__)
 10 | 
 11 | import pommerman
 12 | 
 13 | 
 14 | # Game Manager and Servers
 15 | @a.app.before_request
 16 | def check_for_access():
 17 |     incoming = request.get_json()
 18 |     if request.path != '/ping':
 19 |         incoming = request.get_json()
 20 |         access = incoming.get('access')
 21 |         game_manager_access = os.getenv('PLAYGROUND_GAME_MANAGER_ACCESS')
 22 |         if not access or access != game_manager_access:
 23 |             return jsonify(received=False, error="Access Denied"), 400
 24 | 
 25 | 
 26 | # To Game Manager and Servers, from Web.
 27 | @app.route('/ping', methods=['GET'])
 28 | def ping():
 29 |     return jsonify(success=True)
 30 | 
 31 | 
 32 | # To Game Manager, from Web.
 33 | @app.route('/test', methods=['POST'])
 34 | def test():
 35 |     """Build and run this docker agent locally."""
 36 |     try:
 37 |         incoming = request.get_json()
 38 |         docker_build_path = incoming["docker_build_path"]
 39 |         github_repo = incoming["github_repo"]
 40 |         private_key = incoming["private_key"]
 41 |         name = incoming["name"]
 42 |         agent_id = incoming["agent_id"]
 43 |         user = incoming["user"]
 44 |         config = incoming["config"]
 45 |         celery.run_test(docker_build_path, github_repo, private_key, name,
 46 |                         agent_id, user, config)
 47 |         return jsonify(received=True, error="")
 48 |     except Exception as e:
 49 |         return jsonify(received=False, error=e)
 50 | 
 51 | 
 52 | # To Game Manager, from Web.
 53 | @app.route('/request_battle', methods=['POST'])
 54 | def request_battle():
 55 |     """Process a request to do a battle among four agents.
 56 | 
 57 |     This is on the game manager server. The request includes the docker images
 58 |     for each agent, along with their agent id (aid) and the config.
 59 | 
 60 |     The execution order is:
 61 |     1. Tell each of the four servers to pull their given agent's container.
 62 |     2. They'll then send us back container_is_ready notifications.
 63 |     3. When we receive all of those notifications, we'll fire run_battle here.
 64 |     4. The run_battle script will then manage speaking to each of the servers.
 65 |     5. After the game is over, a result will be sent back to the web server.
 66 |     """
 67 |     try:
 68 |         incoming = request.get_json()
 69 |         agents = [{
 70 |             'docker_image':
 71 |             incoming.get('docker_image_agent_%d' % agent_id),
 72 |             'aid':
 73 |             incoming.get('aid_%d' % agent_id),
 74 |             'agent_id':
 75 |             agent_id
 76 |         } for agent_id in range(4)]
 77 |         battle_info = incoming['config']
 78 |         battle_info += '-%d-%d-%d-%d' % [agent['aid'] for agent in agents]
 79 |         success, message = notify_containers(agents, battle_info)
 80 |         if success:
 81 |             return jsonify(success=True, error="")
 82 |         else:
 83 |             return jsonify(success=False, error=message)
 84 |     except Exception as e:
 85 |         return jsonify(success=False, error=e)
 86 | 
 87 | 
 88 | def notify_containers(agents, battle_info):
 89 |     """Tell the servers to pull and start the given containers."""
 90 |     for agent in enumerate(agents):
 91 |         if pommerman.helpers.use_game_servers:
 92 |             server = pommerman.helpers.game_servers[agent['agent_id']]
 93 |         else:
 94 |             server = "http://localhost"
 95 | 
 96 |         port = "8000"
 97 |         url = ':'.join([server, port])
 98 |         request_url = url + "/start_container"
 99 |         # This includes the aid, the docker_image, and the agent_id
100 |         request_json = agent.copy()
101 |         request_json["access"] = os.getenv('PLAYGROUND_GAME_MANAGER_ACCESS')
102 |         request_json["battle_info"] = battle_info
103 |         request_json["url"] = url
104 |         requests.post(request_url, json=request_json)
105 | 
106 | 
107 | # From Game Manager, To Game Servers.
108 | @app.route('/start_container', methods=['POST'])
109 | def start_container():
110 |     """Server endpoint for requests to pull and then start containers."""
111 |     game_manager_url = os.getenv("PLAYGROUND_GAME_MANAGER_SERVER") + ":8000"
112 | 
113 |     try:
114 |         incoming = request.get_json()
115 | 
116 |         # The battle's unique identifier, my url (server:port), and the docker
117 |         # image. I'm going to send these back when I report we're good to go.
118 |         agent_id = incoming["agent_id"]
119 |         battle_info = incoming["battle_info"]
120 |         docker_image = incoming["docker_image"]
121 |         url = incoming["url"]
122 | 
123 |         client = docker.from_env()
124 |         client.login(
125 |             os.getenv("PLAYGROUND_DOCKER_LOGIN"),
126 |             os.getenv("PLAYGROUND_DOCKER_PASSWORD"))
127 |         logging.warn("Pulling the image %s..." % docker_image)
128 |         img = client.images.pull(docker_image, tag="latest")
129 | 
130 |         if img:
131 |             request_url = game_manager_url + "/container_is_ready"
132 |             request_json = {
133 |                 'aid': incoming['aid'],
134 |                 'battle_info': battle_info,
135 |                 'docker_image': docker_image,
136 |                 'agent_id': agent_id
137 |             }
138 |             requests.post(request_url, json=request_json)
139 |         else:
140 |             pass
141 |     except Exception as e:
142 |         print("Failed to pull container: %s" % e)
143 | 
144 | 
145 | # From Game Servers, To Game Manager.
146 | @app.route('/container_is_ready', methods=['POST'])
147 | def container_is_ready():
148 |     """A ready container alert from a server came in. Feed this to celery."""
149 |     try:
150 |         incoming = request.get_json()
151 |         celery_.add_server_ready_notif(incoming)
152 |         return jsonify(success=True, error="")
153 |     except Exception as e:
154 |         return jsonify(success=False, error=e)
155 | 
156 | 
157 | # From Game Manager, To Game Servers.
158 | @app.route('/run_container', methods=['POST'])
159 | def run_container():
160 |     incoming = request.get_json()
161 |     docker_image = incoming['docker_image']
162 |     env_vars = incoming['env_vars']
163 |     port = incoming['port']
164 | 
165 |     client = docker.from_env()
166 |     client.login(
167 |         os.getenv("PLAYGROUND_DOCKER_LOGIN"),
168 |         os.getenv("PLAYGROUND_DOCKER_PASSWORD"))
169 |     container = client.containers.run(
170 |         docker_image,
171 |         detach=True,
172 |         auto_remove=True,
173 |         ports={10080: port},
174 |         environment=env_vars)
175 |     for line in container.logs(stream=True):
176 |         print(line.decode("utf-8").strip())
177 | 
178 | 
179 | if __name__ == '__main__':
180 |     app.run()
181 | 


--------------------------------------------------------------------------------
/manager/app.wsgi:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | path_to_server_files = '/var/www/manager'
 5 | sys.path.insert(0, path_to_server_files)
 6 | activate_this = os.path.join(path_to_server_files, 'venv/bin/activate_this.py')
 7 | with open(activate_this) as file_:
 8 |     exec(file_.read(), dict(__file__=activate_this))
 9 | 
10 | from app import app as application
11 | 


--------------------------------------------------------------------------------
/manager/requirements.txt:
--------------------------------------------------------------------------------
 1 | amqp==2.3.1
 2 | billiard==3.5.0.3
 3 | celery==5.2.2
 4 | certifi==2018.4.16
 5 | chardet==3.0.4
 6 | click==6.7
 7 | docker==3.3.0
 8 | docker-pycreds==0.2.3
 9 | Flask==0.12.4
10 | future==0.16.0
11 | gitdb2==2.0.3
12 | GitPython==2.1.10
13 | gym==0.10.5
14 | idna==2.6
15 | itsdangerous==0.24
16 | Jinja2==2.10
17 | kombu==4.2.0
18 | MarkupSafe==1.0
19 | numpy==1.14.3
20 | Pillow==8.3.2
21 | pommerman==0.2.0
22 | pyglet==1.4.0a1
23 | pytz==2018.4
24 | requests==2.18.4
25 | ruamel.yaml==0.15.37
26 | scipy==1.1.0
27 | six==1.11.0
28 | smmap2==2.0.3
29 | urllib3==1.22
30 | vine==1.1.4
31 | websocket-client==0.48.0
32 | Werkzeug==0.15.3
33 | 


--------------------------------------------------------------------------------
/manager/run_celery.sh:
--------------------------------------------------------------------------------
1 | celery worker -A celery_.celery --loglevel=info
2 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: Pommerman
 2 | theme: material
 3 | markdown_extensions:
 4 |   - codehilite
 5 | repo_url: http://github.com/MultiAgentLearning/playground
 6 | site_url: http://pommerman.com/
 7 | site_description: 'Documentation for Pommerman.'
 8 | 
 9 | nav:
10 |   - Home: index.md
11 |   - Getting Started: getting_started.md
12 |   - Command Line Interface: CLI.md
13 |   - Multiplayer: multiplayer.md
14 |   - Competitions: competitions.md
15 |   - Environment Reference: environment.md
16 |   - Research Topics: research.md


--------------------------------------------------------------------------------
/notebooks/Playground.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pommerman Demo.\n",
  8 |     "\n",
  9 |     "This notebook demonstrates how to train Pommerman agents. Please let us know at support@pommerman.com if you run into any issues."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import os\n",
 19 |     "import sys\n",
 20 |     "import numpy as np\n",
 21 |     "\n",
 22 |     "from pommerman.agents import SimpleAgent, RandomAgent, PlayerAgent, BaseAgent\n",
 23 |     "from pommerman.configs import ffa_v0_fast_env\n",
 24 |     "from pommerman.envs.v0 import Pomme\n",
 25 |     "from pommerman.characters import Bomber\n",
 26 |     "from pommerman import utility"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "# Random agents\n",
 34 |     "\n",
 35 |     "The following codes instantiates the environment with four random agents who take actions until the game is finished. (This will be a quick game.)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "\u001b[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.\u001b[0m\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "# Instantiate the environment\n",
 53 |     "config = ffa_v0_fast_env()\n",
 54 |     "env = Pomme(**config[\"env_kwargs\"])"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 3,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "# Add four random agents\n",
 64 |     "agents = {}\n",
 65 |     "for agent_id in range(4):\n",
 66 |     "    agents[agent_id] = RandomAgent(config[\"agent\"](agent_id, config[\"game_type\"]))\n",
 67 |     "env.set_agents(list(agents.values()))\n",
 68 |     "env.set_init_game_state(None)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 4,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "{'result': <Result.Win: 0>, 'winners': [3]}\n"
 81 |      ]
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "# Seed and reset the environment\n",
 86 |     "env.seed(0)\n",
 87 |     "obs = env.reset()\n",
 88 |     "\n",
 89 |     "# Run the random agents until we're done\n",
 90 |     "done = False\n",
 91 |     "while not done:\n",
 92 |     "    env.render()\n",
 93 |     "    actions = env.act(obs)\n",
 94 |     "    obs, reward, done, info = env.step(actions)\n",
 95 |     "env.render(close=True)\n",
 96 |     "env.close()\n",
 97 |     "\n",
 98 |     "print(info)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "# Human Agents\n",
106 |     "\n",
107 |     "The following code runs the environment with 3 random agents and one agent with human input (use the arrow keys on your keyboard). This can also be called on the command line with:\n",
108 |     "\n",
109 |     "`python run_battle.py --agents=player::arrows,random::null,random::null,random::null --config=PommeFFACompetition-v0`\n",
110 |     "\n",
111 |     "You can also run this with SimpleAgents by executing:\n",
112 |     "\n",
113 |     "`python run_battle.py --agents=player::arrows,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent --config=PommeFFACompetition-v0`"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "\u001b[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.\u001b[0m\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": [
130 |     "# Instantiate the environment\n",
131 |     "config = ffa_v0_fast_env()\n",
132 |     "env = Pomme(**config[\"env_kwargs\"])\n",
133 |     "\n",
134 |     "# Add 3 random agents\n",
135 |     "agents = {}\n",
136 |     "for agent_id in range(3):\n",
137 |     "    agents[agent_id] = RandomAgent(config[\"agent\"](agent_id, config[\"game_type\"]))\n",
138 |     "\n",
139 |     "# Add human agent\n",
140 |     "agents[3] = PlayerAgent(config[\"agent\"](agent_id, config[\"game_type\"]), \"arrows\")\n",
141 |     "\n",
142 |     "env.set_agents(list(agents.values()))\n",
143 |     "env.set_init_game_state(None)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 7,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "{'result': <Result.Tie: 2>}\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "# Seed and reset the environment\n",
161 |     "env.seed(0)\n",
162 |     "obs = env.reset()\n",
163 |     "\n",
164 |     "# Run the agents until we're done\n",
165 |     "done = False\n",
166 |     "while not done:\n",
167 |     "    env.render()\n",
168 |     "    actions = env.act(obs)\n",
169 |     "    obs, reward, done, info = env.step(actions)\n",
170 |     "env.render(close=True)\n",
171 |     "env.close()\n",
172 |     "\n",
173 |     "# Print the result\n",
174 |     "print(info)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "# Training an Agent\n",
182 |     "\n",
183 |     "The following code uses Tensorforce to train a PPO agent. This is in the train_with_tensorforce.py module as well."
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 8,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "# Make sure you have tensorforce installed: pip install tensorforce\n",
193 |     "from tensorforce.agents import PPOAgent\n",
194 |     "from tensorforce.execution import Runner\n",
195 |     "from tensorforce.contrib.openai_gym import OpenAIGym"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 9,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "def make_np_float(feature):\n",
205 |     "    return np.array(feature).astype(np.float32)\n",
206 |     "\n",
207 |     "def featurize(obs):\n",
208 |     "    board = obs[\"board\"].reshape(-1).astype(np.float32)\n",
209 |     "    bomb_blast_strength = obs[\"bomb_blast_strength\"].reshape(-1).astype(np.float32)\n",
210 |     "    bomb_life = obs[\"bomb_life\"].reshape(-1).astype(np.float32)\n",
211 |     "    position = make_np_float(obs[\"position\"])\n",
212 |     "    ammo = make_np_float([obs[\"ammo\"]])\n",
213 |     "    blast_strength = make_np_float([obs[\"blast_strength\"]])\n",
214 |     "    can_kick = make_np_float([obs[\"can_kick\"]])\n",
215 |     "\n",
216 |     "    teammate = obs[\"teammate\"]\n",
217 |     "    if teammate is not None:\n",
218 |     "        teammate = teammate.value\n",
219 |     "    else:\n",
220 |     "        teammate = -1\n",
221 |     "    teammate = make_np_float([teammate])\n",
222 |     "\n",
223 |     "    enemies = obs[\"enemies\"]\n",
224 |     "    enemies = [e.value for e in enemies]\n",
225 |     "    if len(enemies) < 3:\n",
226 |     "        enemies = enemies + [-1]*(3 - len(enemies))\n",
227 |     "    enemies = make_np_float(enemies)\n",
228 |     "\n",
229 |     "    return np.concatenate((board, bomb_blast_strength, bomb_life, position, ammo, blast_strength, can_kick, teammate, enemies))\n",
230 |     "\n",
231 |     "class TensorforceAgent(BaseAgent):\n",
232 |     "    def act(self, obs, action_space):\n",
233 |     "        pass"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 11,
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "\u001b[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.\u001b[0m\n",
246 |       "INFO:tensorflow:Graph was finalized.\n",
247 |       "INFO:tensorflow:Running local_init_op.\n",
248 |       "INFO:tensorflow:Done running local_init_op.\n"
249 |      ]
250 |     }
251 |    ],
252 |    "source": [
253 |     "# Instantiate the environment\n",
254 |     "config = ffa_v0_fast_env()\n",
255 |     "env = Pomme(**config[\"env_kwargs\"])\n",
256 |     "env.seed(0)\n",
257 |     "\n",
258 |     "# Create a Proximal Policy Optimization agent\n",
259 |     "agent = PPOAgent(\n",
260 |     "    states=dict(type='float', shape=env.observation_space.shape),\n",
261 |     "    actions=dict(type='int', num_actions=env.action_space.n),\n",
262 |     "    network=[\n",
263 |     "        dict(type='dense', size=64),\n",
264 |     "        dict(type='dense', size=64)\n",
265 |     "    ],\n",
266 |     "    batching_capacity=1000,\n",
267 |     "    step_optimizer=dict(\n",
268 |     "        type='adam',\n",
269 |     "        learning_rate=1e-4\n",
270 |     "    )\n",
271 |     ")\n",
272 |     "\n",
273 |     "# Add 3 random agents\n",
274 |     "agents = []\n",
275 |     "for agent_id in range(3):\n",
276 |     "    agents.append(SimpleAgent(config[\"agent\"](agent_id, config[\"game_type\"])))\n",
277 |     "\n",
278 |     "# Add TensorforceAgent\n",
279 |     "agent_id += 1\n",
280 |     "agents.append(TensorforceAgent(config[\"agent\"](agent_id, config[\"game_type\"])))\n",
281 |     "env.set_agents(agents)\n",
282 |     "env.set_training_agent(agents[-1].agent_id)\n",
283 |     "env.set_init_game_state(None)"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": 12,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": [
292 |     "class WrappedEnv(OpenAIGym):    \n",
293 |     "    def __init__(self, gym, visualize=False):\n",
294 |     "        self.gym = gym\n",
295 |     "        self.visualize = visualize\n",
296 |     "    \n",
297 |     "    def execute(self, action):\n",
298 |     "        if self.visualize:\n",
299 |     "            self.gym.render()\n",
300 |     "\n",
301 |     "        actions = self.unflatten_action(action=action)\n",
302 |     "            \n",
303 |     "        obs = self.gym.get_observations()\n",
304 |     "        all_actions = self.gym.act(obs)\n",
305 |     "        all_actions.insert(self.gym.training_agent, actions)\n",
306 |     "        state, reward, terminal, _ = self.gym.step(all_actions)\n",
307 |     "        agent_state = featurize(state[self.gym.training_agent])\n",
308 |     "        agent_reward = reward[self.gym.training_agent]\n",
309 |     "        return agent_state, terminal, agent_reward\n",
310 |     "    \n",
311 |     "    def reset(self):\n",
312 |     "        obs = self.gym.reset()\n",
313 |     "        agent_obs = featurize(obs[3])\n",
314 |     "        return agent_obs"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 13,
320 |    "metadata": {},
321 |    "outputs": [
322 |     {
323 |      "name": "stdout",
324 |      "output_type": "stream",
325 |      "text": [
326 |       "Stats:  [-1, -1, -1, -1, -1] [15, 15, 27, 32, 26] [2.0443358421325684, 0.7581827640533447, 1.3421897888183594, 1.6136739253997803, 1.2573180198669434]\n"
327 |      ]
328 |     }
329 |    ],
330 |    "source": [
331 |     "# Instantiate and run the environment for 5 episodes.\n",
332 |     "wrapped_env = WrappedEnv(env, True)\n",
333 |     "runner = Runner(agent=agent, environment=wrapped_env)\n",
334 |     "runner.run(episodes=5, max_episode_timesteps=2000)\n",
335 |     "print(\"Stats: \", runner.episode_rewards, runner.episode_timesteps, runner.episode_times)\n",
336 |     "\n",
337 |     "try:\n",
338 |     "    runner.close()\n",
339 |     "except AttributeError as e:\n",
340 |     "    pass"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": null,
346 |    "metadata": {},
347 |    "outputs": [],
348 |    "source": []
349 |   }
350 |  ],
351 |  "metadata": {
352 |   "kernelspec": {
353 |    "display_name": "pommerman",
354 |    "language": "python",
355 |    "name": "pommerman"
356 |   },
357 |   "language_info": {
358 |    "codemirror_mode": {
359 |     "name": "ipython",
360 |     "version": 3
361 |    },
362 |    "file_extension": ".py",
363 |    "mimetype": "text/x-python",
364 |    "name": "python",
365 |    "nbconvert_exporter": "python",
366 |    "pygments_lexer": "ipython3",
367 |    "version": "3.6.5"
368 |   }
369 |  },
370 |  "nbformat": 4,
371 |  "nbformat_minor": 2
372 | }
373 | 


--------------------------------------------------------------------------------
/pommerman/README.md:
--------------------------------------------------------------------------------
 1 | # Pommerman
 2 | 
 3 | ### Game Overview:
 4 | * Pommerman is a play on Bomberman. There are three different variants, each of which follow the same basic idea but have their own distinct flavors:
 5 |   * FFA: Free For All where four agents enter and one leaves. It tests planning, tactics, and cunning. The board is fully observable.
 6 |   * Team (The NIPS '18 Competition environment): 2v2 where two teams of agents enter and one team wins. It tests planning, and tactics, and cooperation. The board is partially observable.
 7 |   * Team Radio: Like team in that a it's a 2v2 game. Differences are that the agents each have a radio that they can use to convey 2 words from a dictionary of size 8 each step.
 8 | 
 9 | ### Directory Overview:
10 | 
11 | * agents: Baseline agents will reside here in addition to being available in the Docker directory. 
12 | * characters.py: Here lies the actors in the game. This includes Agent, Bomb, and Flame.
13 | * configs.py: This configs module contains the setup. Feel free to edit this in your local directory for easy game loading.
14 | * envs (module):
15 |   * utility.py has shared Enums, constants, and common functions to the different environments.
16 |   * v0.py: This environment is the base one that we use. 
17 |   * v1.py: This is a modification of v0.py that collapses the walls in order to end the game more quickly.
18 |   * v2.py: This is a modification of v0.py that adds in communication. It works by having the agents send a message as part of their actions and then includes that message in the next turn of observations.
19 | 
20 | ### Agent Observations:
21 | 
22 | * Each agent sees:
23 |   * Board: The 11x11 board is a numpy array where each value corresponds to one of the representations below. 
24 |     * Passage = 0
25 |     * Rigid Wall = 1
26 |     * Wooden Wall = 2
27 |     * Bomb = 3
28 |     * Flames = 4
29 |     * Fog = 5: This is only applicable in the partially observed (2v2 Team Radio) setting.
30 |     * Extra Bomb Power-Up = 6: adds ammo.
31 |     * Increase Range Power-Up = 7: increases the blast_strength
32 |     * Can Kick Power-Up = 8: can kick bombs by touching them.
33 |     * AgentDummy = 9
34 |     * Agent0 = 10
35 |     * Agent1 = 11
36 |     * Agent2 = 12
37 |     * Agent3 = 13
38 |   * Position: A tuple of Ints of (X position, Y position)
39 |   * Ammo: An Int representing the amount of ammo this agent has. 
40 |   * Blast Strength: An Int representing the blast strength of this agent's bombs.
41 |   * Can Kick: Whether this agent can kick bombs. This ability is gained by stepping on the can kick power-up.
42 |   * Teammate: One Int in [9, 13].  Which agent is this agent's teammate. In the FFA game, this is the AgentDummy.
43 |   * Enemies: A list of three Ints, each in [9, 13]. Which agents are this agent's enemies. There are three here to be amenable to all variants of the game. When there are only two enemies like in the team competitions, the last Int will be the AgentDummy to reflect the fact that there are only two enemies.
44 |   * Bomb Blast Strength: An 11x11 numpy int array representing the bombs' blast strengths in the agent's view. Everything outside of its view will be fogged out.
45 |   * Bomb Life: An 11x11 numpy int array representing the bombs' life in the agent's view. Everything outside of its view will be fogged out.
46 |   * Message: (Team Radio only) A list of two Ints, each in [0, 8]. The message being relayed from the teammate. Both ints are zero when a teammate is dead or it's the first step. Otherwise they are in [1, 8].
47 | 
48 | ### Agent Actions:
49 | 
50 | * Each agent's actions are:
51 |   * Movement: a single integer in [0, 5] representing which of the six actions that agent would like to take of the following
52 |     * Stop (0): This action is a pass.
53 |     * Up (1): Move up on the board.
54 |     * Down (2): Move down on the board.
55 |     * Left (3): Move left on the board.
56 |     * Right (4): Move right on the board.
57 |     * Bomb (5): Lay a bomb.
58 |   * Message: (Team Radio only) A list of two Ints in [1, 8]. These represent the message. 
59 |         
60 | ### Game Rules:
61 | 
62 | * Every battle starts on a randomly drawn symmetric 11x11 grid (`board'). There are four agents, one in each of the corners. An agent's teammate (if applicable) will be on the kitty corner.
63 | * The board is randomly constructed before each battle and, besides the agents, contains wood walls and rigid walls. We guarantee that the agents will have an accessible path to each other, possibly through wooden walls.
64 | * Rigid walls are indestructible and impassable.
65 | * Wooden walls can be destroyed by bombs (see below). Until they are destroyed, they are impassable. After they are destroyed, they become either a passage or a power-up.
66 | * In any given turn, an agent can choose from one of six actions:
67 |   * Stop (0): This action is a pass.
68 |   * Up (1): Move up on the board.
69 |   * Left (2): Move left on the board.
70 |   * Down (3): Move down on the board.
71 |   * Right (4): Move right on the board.
72 |   * Bomb (5): Lay a bomb.
73 |   * If there is communication, each agent additionally emits a message on each turn consisting of two words from a dictionary of size eight. These words will be given to its teammate in the next step as part of the observation.
74 | * The agent starts with one bomb ("ammo"). Every time it lays a bomb, its ammo decreases by one. After that bomb explodes, its ammo will increase by one.
75 | * The agent also has a blast strength (starts at three). Every bomb it lays is imbued with the current blast strength, which is how far in the vertical and horizontal directions that bomb will effect.
76 | * A bomb has a life of 10 time steps. After its life expires, it explodes and any wooden walls, agents, power-ups or other bombs in its range (given by the blast strength) are destroyed.
77 | * Power-Ups: Half of the wooden walls have power-ups hidden underneath them that are revealed when they are destroyed. These are:
78 |   * Extra Bomb: Picking this up increases the agent's ammo by one.
79 |   * Increase Range: Picking this up increases the agent's blast strength by one.
80 |   * Can Kick: Picking this up allows an agent to kick bombs. It does this by running into them. They then travel in the direction that the agent was moving at a speed of one unit per time step until they are impeded either by a player, a bomb, or a wall.
81 | * The game ends when both players on one team have been destroyed. The winning team is the one who has remaining members.
82 | * Ties can happen when the game does not end before the max steps or if both teams' last agents are destroyed on the same turn. If this happens in a competition, we will rerun the game once. If it happens again after that, then we will rerun it with collapsing walls until there is a winner. This is a variant where, after a large number of steps, the game board becomes smaller according to a specified cadence. See v1.py for a working example in the code.
83 | * If an agent does not respond in an appropriate time limit (100ms), then we will automatically issue them the Stop action and have them send out the message (0, 0).        
84 | * The game setup does not allow for the agents to share a centralized controller. If, however, some clever participant figured out a way to force this, they will be subsequently disqualified.
85 | 


--------------------------------------------------------------------------------
/pommerman/__init__.py:
--------------------------------------------------------------------------------
 1 | '''Entry point into the pommerman module'''
 2 | import gym
 3 | import inspect
 4 | from . import agents
 5 | from . import configs
 6 | from . import constants
 7 | from . import forward_model
 8 | from . import helpers
 9 | from . import utility
10 | from . import network
11 | 
12 | gym.logger.set_level(40)
13 | REGISTRY = None
14 | 
15 | 
16 | def _register():
17 |     global REGISTRY
18 |     REGISTRY = []
19 |     for name, f in inspect.getmembers(configs, inspect.isfunction):
20 |         if not name.endswith('_env'):
21 |             continue
22 | 
23 |         config = f()
24 |         gym.envs.registration.register(
25 |             id=config['env_id'],
26 |             entry_point=config['env_entry_point'],
27 |             kwargs=config['env_kwargs']
28 |         )
29 |         REGISTRY.append(config['env_id'])
30 | 
31 | 
32 | # Register environments with gym
33 | _register()
34 | 
35 | def make(config_id, agent_list, game_state_file=None, render_mode='human'):
36 |     '''Makes the pommerman env and registers it with gym'''
37 |     assert config_id in REGISTRY, "Unknown configuration '{}'. " \
38 |         "Possible values: {}".format(config_id, REGISTRY)
39 |     env = gym.make(config_id)
40 | 
41 |     for id_, agent in enumerate(agent_list):
42 |         assert isinstance(agent, agents.BaseAgent)
43 |         # NOTE: This is IMPORTANT so that the agent character is initialized
44 |         agent.init_agent(id_, env.spec._kwargs['game_type'])
45 | 
46 |     env.set_agents(agent_list)
47 |     env.set_init_game_state(game_state_file)
48 |     env.set_render_mode(render_mode)
49 |     return env
50 | 
51 | 
52 | from . import cli
53 | 


--------------------------------------------------------------------------------
/pommerman/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | '''Entry point into the agents module set'''
 2 | from .base_agent import BaseAgent
 3 | from .docker_agent import DockerAgent
 4 | from .http_agent import HttpAgent
 5 | from .player_agent import PlayerAgent
 6 | from .player_agent_blocking import PlayerAgentBlocking
 7 | from .random_agent import RandomAgent
 8 | from .simple_agent import SimpleAgent
 9 | from .tensorforce_agent import TensorForceAgent
10 | 


--------------------------------------------------------------------------------
/pommerman/agents/base_agent.py:
--------------------------------------------------------------------------------
 1 | '''This is the base abstraction for agents in pommerman.
 2 | All agents should inherent from this class'''
 3 | from .. import characters
 4 | 
 5 | 
 6 | class BaseAgent:
 7 |     """Parent abstract Agent."""
 8 | 
 9 |     def __init__(self, character=characters.Bomber):
10 |         self._character = character
11 | 
12 |     def __getattr__(self, attr):
13 |         return getattr(self._character, attr)
14 | 
15 |     def act(self, obs, action_space):
16 |         raise NotImplementedError()
17 | 
18 |     def episode_end(self, reward):
19 |         """This is called at the end of the episode to let the agent know that
20 |         the episode has ended and what is the reward.
21 | 
22 |         Args:
23 |           reward: The single reward scalar to this agent.
24 |         """
25 |         pass
26 | 
27 |     def init_agent(self, id_, game_type):
28 |         self._character = self._character(id_, game_type)
29 | 
30 |     @staticmethod
31 |     def has_user_input():
32 |         return False
33 | 
34 |     def shutdown(self):
35 |         pass
36 | 


--------------------------------------------------------------------------------
/pommerman/agents/docker_agent.py:
--------------------------------------------------------------------------------
  1 | '''An example docker agent.'''
  2 | import json
  3 | import time
  4 | import os
  5 | import threading
  6 | import requests
  7 | import docker
  8 | 
  9 | from . import BaseAgent
 10 | from .. import utility
 11 | from .. import characters
 12 | 
 13 | 
 14 | class DockerAgent(BaseAgent):
 15 |     """The Docker Agent that Connects to a Docker container where the character runs."""
 16 | 
 17 |     def __init__(self,
 18 |                  docker_image,
 19 |                  port,
 20 |                  server='http://localhost',
 21 |                  character=characters.Bomber,
 22 |                  docker_client=None,
 23 |                  env_vars=None):
 24 |         super(DockerAgent, self).__init__(character)
 25 | 
 26 |         self._docker_image = docker_image
 27 |         self._docker_client = docker_client
 28 |         if not self._docker_client:
 29 |             self._docker_client = docker.from_env()
 30 |             self._docker_client.login(
 31 |                 os.getenv("PLAYGROUND_DOCKER_LOGIN"),
 32 |                 os.getenv("PLAYGROUND_DOCKER_PASSWORD"))
 33 | 
 34 |         self._acknowledged = False  # Becomes True when the container is ready.
 35 |         self._server = server
 36 |         self._port = port
 37 |         self._timeout = 32
 38 |         self._container = None
 39 |         self._env_vars = env_vars or {}
 40 |         # Pass env variables starting with DOCKER_AGENT to the container.
 41 |         for key, value in os.environ.items():
 42 |             if not key.startswith("DOCKER_AGENT_"):
 43 |                 continue
 44 |             env_key = key.replace("DOCKER_AGENT_", "")
 45 |             self._env_vars[env_key] = value
 46 | 
 47 |         # Start the docker agent if it is on this computer. Otherwise, it's far
 48 |         # away and we need to tell that server to start it.
 49 |         if 'localhost' in server:
 50 |             container_thread = threading.Thread(
 51 |                 target=self._run_container, daemon=True)
 52 |             container_thread.start()
 53 |             print("Waiting for docker agent at {}:{}...".format(server, port))
 54 |             self._wait_for_docker()
 55 |         else:
 56 |             request_url = "{}:8000/run_container".format(server)
 57 |             request_json = {
 58 |                 'docker_image': self._docker_image,
 59 |                 'env_vars': self._env_vars,
 60 |                 'port': port
 61 |             }
 62 |             requests.post(request_url, json=request_json)
 63 |             waiting_thread = threading.Thread(
 64 |                 target=self._wait_for_docker, daemon=True)
 65 |             waiting_thread.start()
 66 | 
 67 |     def _run_container(self):
 68 |         print("Starting container...")
 69 |         self._container = self._docker_client.containers.run(
 70 |             self._docker_image,
 71 |             detach=True,
 72 |             auto_remove=True,
 73 |             ports={10080: self._port},
 74 |             environment=self._env_vars)
 75 |         for line in self._container.logs(stream=True):
 76 |             print(line.decode("utf-8").strip())
 77 | 
 78 |     def _wait_for_docker(self):
 79 |         """Wait for network service to appear. A timeout of 0 waits forever."""
 80 |         timeout = self._timeout
 81 |         backoff = .25
 82 |         max_backoff = min(timeout, 16)
 83 | 
 84 |         if timeout:
 85 |             # time module is needed to calc timeout shared between two exceptions
 86 |             end = time.time() + timeout
 87 | 
 88 |         while True:
 89 |             try:
 90 |                 now = time.time()
 91 |                 if timeout and end < now:
 92 |                     print("Timed out - %s:%s" % (self._server, self._port))
 93 |                     raise
 94 | 
 95 |                 request_url = '%s:%s/ping' % (self._server, self._port)
 96 |                 req = requests.get(request_url)
 97 |                 self._acknowledged = True
 98 |                 return True
 99 |             except requests.exceptions.ConnectionError as e:
100 |                 print("ConnectionError: ", e)
101 |                 backoff = min(max_backoff, backoff * 2)
102 |                 time.sleep(backoff)
103 |             except requests.exceptions.HTTPError as e:
104 |                 print("HTTPError: ", e)
105 |                 backoff = min(max_backoff, backoff * 2)
106 |                 time.sleep(backoff)
107 |             except docker.errors.APIError as e:
108 |                 print("This is a Docker error. Please fix: ", e)
109 |                 raise
110 | 
111 |     def init_agent(self, id, game_type):
112 |         super(DockerAgent, self).init_agent(id, game_type)
113 |         request_url = "http://localhost:{}/init_agent".format(self._port)
114 |         try:
115 |             req = requests.post(
116 |                 request_url,
117 |                 timeout=0.5,
118 |                 json={
119 |                     "id": json.dumps(id, cls=utility.PommermanJSONEncoder),
120 |                     "game_type": json.dumps(game_type, cls=utility.PommermanJSONEncoder)
121 |                 })
122 |         except requests.exceptions.Timeout as e:
123 |             print('Timeout in init_agent()!')
124 | 
125 |     def act(self, obs, action_space):
126 |         obs_serialized = json.dumps(obs, cls=utility.PommermanJSONEncoder)
127 |         request_url = "http://localhost:{}/action".format(self._port)
128 |         try:
129 |             req = requests.post(
130 |                 request_url,
131 |                 timeout=0.15,
132 |                 json={
133 |                     "obs":
134 |                     obs_serialized,
135 |                     "action_space":
136 |                     json.dumps(action_space, cls=utility.PommermanJSONEncoder)
137 |                 })
138 |             action = req.json()['action']
139 |         except requests.exceptions.Timeout as e:
140 |             print('Timeout!')
141 |             # TODO: Fix this. It's ugly.
142 |             num_actions = len(action_space.shape)
143 |             if num_actions > 1:
144 |                 return [0] * num_actions
145 |             else:
146 |                 return 0
147 |         return action
148 | 
149 |     def episode_end(self, reward):
150 |         request_url = "http://localhost:{}/episode_end".format(self._port)
151 |         try:
152 |             req = requests.post(
153 |                 request_url,
154 |                 timeout=0.5,
155 |                 json={
156 |                     "reward": json.dumps(reward, cls=utility.PommermanJSONEncoder)
157 |                 })
158 |         except requests.exceptions.Timeout as e:
159 |             print('Timeout in episode_end()!')
160 | 
161 |     def shutdown(self):
162 |         request_url = "http://localhost:{}/shutdown".format(self._port)
163 |         try:
164 |             req = requests.post(
165 |                 request_url,
166 |                 timeout=0.5,
167 |                 json={ })
168 |         except requests.exceptions.Timeout as e:
169 |             print('Timeout in shutdown()!')
170 | 
171 |         print("Stopping container..")
172 |         if self._container:
173 |             try:
174 |                 return self._container.remove(force=True)
175 |             except docker.errors.NotFound as e:
176 |                 return True
177 | 


--------------------------------------------------------------------------------
/pommerman/agents/http_agent.py:
--------------------------------------------------------------------------------
  1 | '''The HTTP agent - provides observation using http push to remote
  2 |    agent and expects action in the reply'''
  3 | import json
  4 | import time
  5 | import os
  6 | import threading
  7 | import requests
  8 | 
  9 | from . import BaseAgent
 10 | from .. import utility
 11 | from .. import characters
 12 | 
 13 | 
 14 | class HttpAgent(BaseAgent):
 15 |     """The HTTP Agent that connects to a port with a remote agent where the
 16 |        character runs. It uses the same interface as the docker agent and
 17 |        is useful for debugging."""
 18 | 
 19 |     def __init__(self,
 20 |                  port=8080,
 21 |                  host='localhost',
 22 |                  timeout=120,
 23 |                  character=characters.Bomber):
 24 |         self._port = port
 25 |         self._host = host
 26 |         self._timeout = timeout
 27 |         super(HttpAgent, self).__init__(character)
 28 |         self._wait_for_remote()
 29 | 
 30 |     def _wait_for_remote(self):
 31 |         """Wait for network service to appear. A timeout of 0 waits forever."""
 32 |         timeout = self._timeout
 33 |         backoff = .25
 34 |         max_backoff = min(timeout, 16)
 35 | 
 36 |         if timeout:
 37 |             # time module is needed to calc timeout shared between two exceptions
 38 |             end = time.time() + timeout
 39 | 
 40 |         while True:
 41 |             try:
 42 |                 now = time.time()
 43 |                 if timeout and end < now:
 44 |                     print("Timed out - %s:%s" % (self._host, self._port))
 45 |                     raise
 46 | 
 47 |                 request_url = 'http://%s:%s/ping' % (self._host, self._port)
 48 |                 req = requests.get(request_url)
 49 |                 self._acknowledged = True
 50 |                 return True
 51 |             except requests.exceptions.ConnectionError as e:
 52 |                 print("ConnectionError: ", e)
 53 |                 backoff = min(max_backoff, backoff * 2)
 54 |                 time.sleep(backoff)
 55 |             except requests.exceptions.HTTPError as e:
 56 |                 print("HTTPError: ", e)
 57 |                 backoff = min(max_backoff, backoff * 2)
 58 |                 time.sleep(backoff)
 59 | 
 60 |     def init_agent(self, id, game_type):
 61 |         super(HttpAgent, self).init_agent(id, game_type)
 62 |         request_url = "http://{}:{}/init_agent".format(self._host, self._port)
 63 |         try:
 64 |             req = requests.post(
 65 |                 request_url,
 66 |                 timeout=0.5,
 67 |                 json={
 68 |                     "id": json.dumps(id, cls=utility.PommermanJSONEncoder),
 69 |                     "game_type": json.dumps(game_type, cls=utility.PommermanJSONEncoder)
 70 |                 })
 71 |         except requests.exceptions.Timeout as e:
 72 |             print('Timeout in init_agent()!')
 73 | 
 74 |     def act(self, obs, action_space):
 75 |         obs_serialized = json.dumps(obs, cls=utility.PommermanJSONEncoder)
 76 |         request_url = "http://{}:{}/action".format(self._host, self._port)
 77 |         try:
 78 |             req = requests.post(
 79 |                 request_url,
 80 |                 timeout=0.15,
 81 |                 json={
 82 |                     "obs":
 83 |                     obs_serialized,
 84 |                     "action_space":
 85 |                     json.dumps(action_space, cls=utility.PommermanJSONEncoder)
 86 |                 })
 87 |             action = req.json()['action']
 88 |         except requests.exceptions.Timeout as e:
 89 |             print('Timeout!')
 90 |             # TODO: Fix this. It's ugly.
 91 |             num_actions = len(action_space.shape)
 92 |             if num_actions > 1:
 93 |                 return [0] * num_actions
 94 |             else:
 95 |                 return 0
 96 |         return action
 97 | 
 98 |     def episode_end(self, reward):
 99 |         request_url = "http://{}:{}/episode_end".format(self._host, self._port)
100 |         try:
101 |             req = requests.post(
102 |                 request_url,
103 |                 timeout=0.5,
104 |                 json={
105 |                     "reward": json.dumps(reward, cls=utility.PommermanJSONEncoder)
106 |                 })
107 |         except requests.exceptions.Timeout as e:
108 |             print('Timeout in episode_end()!')
109 | 
110 |     def shutdown(self):
111 |         request_url = "http://{}:{}/shutdown".format(self._host, self._port)
112 |         try:
113 |             req = requests.post(
114 |                 request_url,
115 |                 timeout=0.5,
116 |                 json={ })
117 |         except requests.exceptions.Timeout as e:
118 |             print('Timeout in shutdown()!')
119 | 


--------------------------------------------------------------------------------
/pommerman/agents/player_agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | NOTE:
  3 | 
  4 | There are a few minor complications to fluid human control which make this
  5 | code a little more involved than trivial.
  6 | 
  7 | 1. Key press-release cycles can be, and often are, faster than one tick of
  8 |    the game/simulation, but the player still wants that cycle to count, i.e.
  9 |    to lay a bomb!
 10 | 2. When holding down a key, the player expects that action to be repeated,
 11 |    at least after a slight delay.
 12 | 3. But when holding a key down (say, move left) and simultaneously doing a
 13 |    quick press-release cycle (put a bomb), we want the held-down key to keep
 14 |    being executed, but the cycle should have happened in-between.
 15 | 
 16 | The way we solve this problem is by separating key-state and actions-to-do.
 17 | We hold the actions that need be executed in a queue (`self._action_q`) and
 18 | a state for all considered keys.
 19 | 
 20 | 1. When a key is pressed down, we note the time and mark it as down.
 21 | 2. If it is released quickly thereafter, before a game tick could happen,
 22 |    we add its action into the queue. This often happens when putting bombs.
 23 | 3. If it's still pressed down as we enter a game tick, we do some math to see
 24 |    if it's time for a "repeat" event and, if so, push an action to the queue.
 25 | 4. Just work off one item from the queue each tick.
 26 | 
 27 | This way, the input is "natural" and things like dropping a bomb while doing
 28 | a diagonal walk from one end to the other "just work".
 29 | """
 30 | 
 31 | from time import time
 32 | 
 33 | from . import BaseAgent
 34 | from .. import characters
 35 | 
 36 | REPEAT_DELAY = 0.2  # seconds
 37 | REPEAT_INTERVAL = 0.1
 38 | 
 39 | 
 40 | class Keystate:
 41 |     '''Handles keyboard state for a human player'''
 42 |     def __init__(self):
 43 |         self.keydown_time = time()
 44 |         self.last_repeat_time = None
 45 |         self.fired = False
 46 | 
 47 |     def should_fire(self):
 48 |         if self.last_repeat_time is None:
 49 |             # The first repetition:
 50 |             if time() - self.keydown_time > REPEAT_DELAY:
 51 |                 return True
 52 |         else:
 53 |             # A repetition after the first:
 54 |             if time() - self.last_repeat_time > REPEAT_INTERVAL:
 55 |                 return True
 56 | 
 57 |         # No repetition yet
 58 |         return False
 59 | 
 60 |     def mark_fired(self):
 61 |         self.last_repeat_time = time()
 62 |         self.fired = True
 63 | 
 64 | 
 65 | class PlayerAgent(BaseAgent):
 66 |     """The Player Agent that lets the user control a character."""
 67 | 
 68 |     def __init__(self, character=characters.Bomber, agent_control='arrows'):
 69 |         super(PlayerAgent, self).__init__(character)
 70 | 
 71 |         ##
 72 |         # @NOTE: DO NOT move this import outside the constructor. It will
 73 |         # not work in headless environments like a Docker container
 74 |         # and prevents Pommerman from running.
 75 |         #
 76 |         from pyglet.window import key
 77 |         controls = {
 78 |             'arrows': {
 79 |                 key.UP: 1,
 80 |                 key.DOWN: 2,
 81 |                 key.LEFT: 3,
 82 |                 key.RIGHT: 4,
 83 |                 key.SPACE: 5,
 84 |                 key.M: 6  # In Pommerman, this will freeze the game.
 85 |             },
 86 |             'wasd': {
 87 |                 key.W: 1,
 88 |                 key.S: 2,
 89 |                 key.A: 3,
 90 |                 key.D: 4,
 91 |                 key.E: 5,
 92 |                 key.Q: 6  # In Pommerman, this will freeze the game.
 93 |             }
 94 |         }
 95 | 
 96 |         assert agent_control in controls, "Unknown control: {}".format(
 97 |             agent_control)
 98 |         self._key2act = controls[agent_control]
 99 | 
100 |         self._action_q = []
101 |         self._keystate = {}
102 | 
103 |     def act(self, obs, action_space):
104 |         # Go through the keys and fire for those that needs repetition (because they're held down)
105 |         for k, state in self._keystate.items():
106 |             if state.should_fire():
107 |                 self._action_q.append(k)
108 |                 state.mark_fired()
109 | 
110 |         act = 0
111 |         if self._action_q:  # Work off the keys that are queued.
112 |             act = self._key2act[self._action_q.pop(0)]
113 |         return act
114 | 
115 |     @staticmethod
116 |     def has_user_input():
117 |         return True
118 | 
119 |     def on_key_press(self, k, mod):
120 |         # Ignore if we're not handling the key. Avoids "shadowing" ticks in
121 |         # multiplayer mode.
122 |         if k in self._key2act:
123 |             self._keystate[k] = Keystate()
124 | 
125 |     def on_key_release(self, k, mod):
126 |         # We only need to act on keys for which we did something in the
127 |         # `key_press` event, and ignore any other key releases.
128 |         if k in self._keystate:
129 |             # Only mark this as a "press" upon release if it was a quick one,
130 |             # i.e. not held down and executed already
131 |             if not self._keystate[k].fired:
132 |                 self._action_q.append(k)
133 |             del self._keystate[k]
134 | 


--------------------------------------------------------------------------------
/pommerman/agents/player_agent_blocking.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This variant is blocking, that is the game pauses for keyboard input.
 3 | """
 4 | 
 5 | from time import time
 6 | import click
 7 | 
 8 | from . import BaseAgent
 9 | from .. import characters
10 | from .. import constants
11 | 
12 | # keypad control codes
13 | K_PREFIX = '\x1b'
14 | K_RT = '[C'
15 | K_LF = '[D'
16 | K_UP = '[A'
17 | K_DN = '[B'
18 | 
19 | 
20 | class PlayerAgentBlocking(BaseAgent):
21 |     """Block for keyboard input."""
22 | 
23 |     def __init__(self, character=characters.Bomber, agent_control='arrows'):
24 |         super(PlayerAgentBlocking, self).__init__(character)
25 |         self.agent_control = agent_control
26 | 
27 |     def act(self, obs, action_space):
28 |         key = click.getchar()
29 |         if self.agent_control == 'arrows':
30 |             if key == K_RT + K_PREFIX: return constants.Action.Right.value
31 |             if key == K_LF + K_PREFIX: return constants.Action.Left.value
32 |             if key == K_UP + K_PREFIX: return constants.Action.Up.value
33 |             if key == K_DN + K_PREFIX: return constants.Action.Down.value
34 |             if key == ' ': return constants.Action.Bomb.value
35 |             return constants.Action.Stop.value
36 | 
37 |         if self.agent_control == 'wasd':
38 |             if key == 'd': return constants.Action.Right.value
39 |             if key == 'a': return constants.Action.Left.value
40 |             if key == 'w': return constants.Action.Up.value
41 |             if key == 's': return constants.Action.Down.value
42 |             if key == 'e': return constants.Action.Bomb.value
43 |             if key == 'q': return constants.Action.Stop.value
44 |             return constants.Action.Stop.value
45 | 


--------------------------------------------------------------------------------
/pommerman/agents/random_agent.py:
--------------------------------------------------------------------------------
 1 | '''An agent that preforms a random action each step'''
 2 | from . import BaseAgent
 3 | 
 4 | 
 5 | class RandomAgent(BaseAgent):
 6 |     """The Random Agent that returns random actions given an action_space."""
 7 | 
 8 |     def act(self, obs, action_space):
 9 |         return action_space.sample()
10 | 


--------------------------------------------------------------------------------
/pommerman/agents/tensorforce_agent.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A Work-In-Progress agent using Tensorforce
 3 | """
 4 | from . import BaseAgent
 5 | from .. import characters
 6 | 
 7 | 
 8 | class TensorForceAgent(BaseAgent):
 9 |     """The TensorForceAgent. Acts through the algorith, not here."""
10 | 
11 |     def __init__(self, character=characters.Bomber, algorithm='ppo'):
12 |         super(TensorForceAgent, self).__init__(character)
13 |         self.algorithm = algorithm
14 | 
15 |     def act(self, obs, action_space):
16 |         """This agent has its own way of inducing actions. See train_with_tensorforce."""
17 |         return None
18 | 
19 |     def initialize(self, env):
20 |         from gym import spaces
21 |         from tensorforce.agents import PPOAgent
22 | 
23 |         if self.algorithm == "ppo":
24 |             if type(env.action_space) == spaces.Tuple:
25 |                 actions = {
26 |                     str(num): {
27 |                         'type': 'int',
28 |                         'num_actions': space.n
29 |                     }
30 |                     for num, space in enumerate(env.action_space.spaces)
31 |                 }
32 |             else:
33 |                 actions = dict(type='int', num_actions=env.action_space.n)
34 | 
35 |             return PPOAgent(
36 |                 states=dict(type='float', shape=env.observation_space.shape),
37 |                 actions=actions,
38 |                 network=[
39 |                     dict(type='dense', size=64),
40 |                     dict(type='dense', size=64)
41 |                 ],
42 |                 batching_capacity=1000,
43 |                 step_optimizer=dict(type='adam', learning_rate=1e-4))
44 |         return None
45 | 


--------------------------------------------------------------------------------
/pommerman/characters.py:
--------------------------------------------------------------------------------
  1 | """These are objects in the game world. Please do not edit this file. The competition will be run with it as is."""
  2 | 
  3 | import random
  4 | 
  5 | from . import constants
  6 | from . import utility
  7 | 
  8 | 
  9 | class Bomber(object):
 10 |     """Container to keep the agent state."""
 11 | 
 12 |     def __init__(self, agent_id=None, game_type=None):
 13 |         self._game_type = game_type
 14 |         self.ammo = 1
 15 |         self.is_alive = True
 16 |         self.blast_strength = constants.DEFAULT_BLAST_STRENGTH
 17 |         self.can_kick = False
 18 |         if agent_id is not None:
 19 |             self.set_agent_id(agent_id)
 20 | 
 21 |     def set_agent_id(self, agent_id):
 22 |         self.agent_id = agent_id
 23 |         if self._game_type == constants.GameType.FFA:
 24 |             self.teammate = constants.Item.AgentDummy
 25 |             self.enemies = [
 26 |                 getattr(constants.Item, f'Agent{id_}')
 27 |                 for id_ in range(4)
 28 |                 if id_ != agent_id
 29 |             ]
 30 |         elif self._game_type == constants.GameType.OneVsOne:
 31 |             self.teammate = constants.Item.AgentDummy
 32 |             self.enemies = [
 33 |                 getattr(constants.Item, f'Agent{id_}')
 34 |                 for id_ in range(2)
 35 |                 if id_ != agent_id
 36 |             ]
 37 |         else:
 38 |             teammate_id = (agent_id + 2) % 4
 39 |             self.teammate = getattr(constants.Item, f'Agent{teammate_id}')
 40 |             self.enemies = [
 41 |                 getattr(constants.Item, f'Agent{id_}')
 42 |                 for id_ in range(4)
 43 |                 if id_ != agent_id and id_ != teammate_id
 44 |             ]
 45 |             self.enemies.append(constants.Item.AgentDummy)
 46 | 
 47 |     def maybe_lay_bomb(self):
 48 |         if self.ammo > 0:
 49 |             self.ammo -= 1
 50 |             return Bomb(self, self.position, constants.DEFAULT_BOMB_LIFE + 1,
 51 |                         self.blast_strength)
 52 |         return None
 53 | 
 54 |     def incr_ammo(self):
 55 |         self.ammo = min(self.ammo + 1, 10)
 56 | 
 57 |     def get_next_position(self, direction):
 58 |         action = constants.Action(direction)
 59 |         return utility.get_next_position(self.position, action)
 60 | 
 61 |     def move(self, direction):
 62 |         self.position = self.get_next_position(direction)
 63 | 
 64 |     def stop(self):
 65 |         pass
 66 | 
 67 |     def in_range(self, exploded_map):
 68 |         row, col = self.position
 69 |         return exploded_map[row][col] == 1
 70 | 
 71 |     def die(self):
 72 |         self.is_alive = False
 73 | 
 74 |     def set_start_position(self, start_position):
 75 |         self.start_position = start_position
 76 | 
 77 |     def reset(self, ammo=1, is_alive=True, blast_strength=None, can_kick=False):
 78 |         self.position = self.start_position
 79 |         self.ammo = ammo
 80 |         self.is_alive = is_alive
 81 |         self.blast_strength = blast_strength or constants.DEFAULT_BLAST_STRENGTH
 82 |         self.can_kick = can_kick
 83 | 
 84 |     def pick_up(self, item, max_blast_strength):
 85 |         if item == constants.Item.ExtraBomb:
 86 |             self.incr_ammo()
 87 |         elif item == constants.Item.IncrRange:
 88 |             self.blast_strength = min(self.blast_strength + 1,
 89 |                                       max_blast_strength)
 90 |         elif item == constants.Item.Kick:
 91 |             self.can_kick = True
 92 | 
 93 |     def to_json(self):
 94 |         return {
 95 |             "agent_id": self.agent_id,
 96 |             "is_alive": self.is_alive,
 97 |             "position": self.position,
 98 |             "ammo": self.ammo,
 99 |             "blast_strength": self.blast_strength,
100 |             "can_kick": self.can_kick
101 |         }
102 | 
103 | 
104 | class Bomb(object):
105 |     """Container for the Bomb object."""
106 | 
107 |     def __init__(self,
108 |                  bomber,
109 |                  position,
110 |                  life,
111 |                  blast_strength,
112 |                  moving_direction=None):
113 |         self.bomber = bomber
114 |         self.position = position
115 |         self.life = life
116 |         self.blast_strength = blast_strength
117 |         self.moving_direction = moving_direction
118 | 
119 |     def tick(self):
120 |         self.life -= 1
121 | 
122 |     def fire(self):
123 |         """Encounter Flames and blow up."""
124 |         self.life = 0
125 | 
126 |     def move(self):
127 |         if self.is_moving():
128 |             self.position = utility.get_next_position(self.position,
129 |                                                       self.moving_direction)
130 | 
131 |     def stop(self):
132 |         self.moving_direction = None
133 | 
134 |     def exploded(self):
135 |         return self.life == 0
136 | 
137 |     def explode(self):
138 |         row, col = self.position
139 |         indices = {
140 |             'up': ([row - i, col] for i in range(1, self.blast_strength)),
141 |             'down': ([row + i, col] for i in range(self.blast_strength)),
142 |             'left': ([row, col - i] for i in range(1, self.blast_strength)),
143 |             'right': ([row, col + i] for i in range(1, self.blast_strength))
144 |         }
145 |         return indices
146 | 
147 |     def in_range(self, exploded_map):
148 |         row, col = self.position
149 |         return exploded_map[row][col] == 1
150 | 
151 |     def is_moving(self):
152 |         return self.moving_direction is not None
153 | 
154 |     def to_json(self):
155 |         return {
156 |             "position": self.position,
157 |             "bomber_id": self.bomber.agent_id,
158 |             "life": self.life,
159 |             "blast_strength": self.blast_strength,
160 |             "moving_direction": self.moving_direction
161 |         }
162 | 
163 | 
164 | class Flame(object):
165 |     """Container for Flame object."""
166 | 
167 |     def __init__(self, position, life=2):
168 |         self.position = position
169 |         self.life = life
170 | 
171 |     def tick(self):
172 |         self.life -= 1
173 | 
174 |     def is_dead(self):
175 |         return self.life == 0
176 | 
177 |     def to_json(self):
178 |         return {"position": self.position, "life": self.life}
179 | 


--------------------------------------------------------------------------------
/pommerman/cli/__init__.py:
--------------------------------------------------------------------------------
1 | '''CLI module entry point'''
2 | from . import run_battle
3 | 


--------------------------------------------------------------------------------
/pommerman/cli/run_battle.py:
--------------------------------------------------------------------------------
  1 | """Run a battle among agents.
  2 | 
  3 | Call this with a config, a game, and a list of agents. The script will start separate threads to operate the agents
  4 | and then report back the result.
  5 | 
  6 | An example with all four test agents running ffa:
  7 | python run_battle.py --agents=test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent --config=PommeFFACompetition-v0
  8 | 
  9 | An example with one player, two random agents, and one test agent:
 10 | python run_battle.py --agents=player::arrows,test::agents.SimpleAgent,random::null,random::null --config=PommeFFACompetition-v0
 11 | 
 12 | An example with a docker agent:
 13 | python run_battle.py --agents=player::arrows,docker::pommerman/test-agent,random::null,random::null --config=PommeFFACompetition-v0
 14 | """
 15 | import atexit
 16 | from datetime import datetime
 17 | import os
 18 | import random
 19 | import sys
 20 | import time
 21 | 
 22 | import argparse
 23 | import numpy as np
 24 | 
 25 | from .. import helpers
 26 | from .. import make
 27 | from pommerman import utility
 28 | 
 29 | 
 30 | def run(args, num_times=1, seed=None):
 31 |     '''Wrapper to help start the game'''
 32 |     config = args.config
 33 |     record_pngs_dir = args.record_pngs_dir
 34 |     record_json_dir = args.record_json_dir
 35 |     agent_env_vars = args.agent_env_vars
 36 |     game_state_file = args.game_state_file
 37 |     render_mode = args.render_mode
 38 |     do_sleep = args.do_sleep
 39 | 
 40 |     agents = [
 41 |         helpers.make_agent_from_string(agent_string, agent_id)
 42 |         for agent_id, agent_string in enumerate(args.agents.split(','))
 43 |     ]
 44 | 
 45 |     env = make(config, agents, game_state_file, render_mode=render_mode)
 46 | 
 47 |     def _run(record_pngs_dir=None, record_json_dir=None):
 48 |         '''Runs a game'''
 49 |         print("Starting the Game.")
 50 |         if record_pngs_dir and not os.path.isdir(record_pngs_dir):
 51 |             os.makedirs(record_pngs_dir)
 52 |         if record_json_dir and not os.path.isdir(record_json_dir):
 53 |             os.makedirs(record_json_dir)
 54 | 
 55 |         obs = env.reset()
 56 |         done = False
 57 | 
 58 |         while not done:
 59 |             if args.render:
 60 |                 env.render(
 61 |                     record_pngs_dir=record_pngs_dir,
 62 |                     record_json_dir=record_json_dir,
 63 |                     do_sleep=do_sleep)
 64 |             if args.render is False and record_json_dir:
 65 |                 env.save_json(record_json_dir)
 66 |                 time.sleep(1.0 / env._render_fps)
 67 |             actions = env.act(obs)
 68 |             obs, reward, done, info = env.step(actions)
 69 | 
 70 |         print("Final Result: ", info)
 71 |         if args.render:
 72 |             env.render(
 73 |                 record_pngs_dir=record_pngs_dir,
 74 |                 record_json_dir=record_json_dir,
 75 |                 do_sleep=do_sleep)
 76 |             if do_sleep:
 77 |                 time.sleep(5)
 78 |             env.render(close=True)
 79 |             
 80 |         if args.render is False and record_json_dir:
 81 |             env.save_json(record_json_dir)
 82 |             time.sleep(1.0 / env._render_fps)
 83 | 
 84 |         if record_json_dir:
 85 |             finished_at = datetime.now().isoformat()
 86 |             _agents = args.agents.split(',')
 87 |             utility.join_json_state(record_json_dir, _agents, finished_at,
 88 |                                     config, info)
 89 | 
 90 |         return info
 91 | 
 92 |     if seed is None:
 93 |         # Pick a random seed between 0 and 2^31 - 1
 94 |         seed = random.randint(0, np.iinfo(np.int32).max)
 95 |     np.random.seed(seed)
 96 |     random.seed(seed)
 97 |     env.seed(seed)
 98 | 
 99 |     infos = []
100 |     times = []
101 |     for i in range(num_times):
102 |         start = time.time()
103 | 
104 |         record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \
105 |                            if record_pngs_dir else None
106 |         record_json_dir_ = record_json_dir + '/%d' % (i+1) \
107 |                            if record_json_dir else None
108 |         infos.append(_run(record_pngs_dir_, record_json_dir_))
109 | 
110 |         times.append(time.time() - start)
111 |         print("Game Time: ", times[-1])
112 | 
113 |     atexit.register(env.close)
114 |     return infos
115 | 
116 | 
117 | def main():
118 |     '''CLI entry pointed used to bootstrap a battle'''
119 |     simple_agent = 'test::agents.SimpleAgent'
120 |     player_agent = 'player::arrows'
121 |     docker_agent = 'docker::pommerman/simple-agent'
122 | 
123 |     parser = argparse.ArgumentParser(description='Playground Flags.')
124 |     parser.add_argument(
125 |         '--config',
126 |         default='PommeFFACompetition-v0',
127 |         help='Configuration to execute. See env_ids in '
128 |         'configs.py for options.')
129 |     parser.add_argument(
130 |         '--agents',
131 |         default=','.join([simple_agent] * 4),
132 |         # default=','.join([player_agent] + [simple_agent]*3]),
133 |         # default=','.join([docker_agent] + [simple_agent]*3]),
134 |         help='Comma delineated list of agent types and docker '
135 |         'locations to run the agents.')
136 |     parser.add_argument(
137 |         '--agent_env_vars',
138 |         help='Comma delineated list of agent environment vars '
139 |         'to pass to Docker. This is only for the Docker Agent.'
140 |         " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
141 |         'would send two arguments to Docker Agent 0 and one '
142 |         'to Docker Agent 3.',
143 |         default="")
144 |     parser.add_argument(
145 |         '--record_pngs_dir',
146 |         default=None,
147 |         help='Directory to record the PNGs of the game. '
148 |         "Doesn't record if None.")
149 |     parser.add_argument(
150 |         '--record_json_dir',
151 |         default=None,
152 |         help='Directory to record the JSON representations of '
153 |         "the game. Doesn't record if None.")
154 |     parser.add_argument(
155 |         "--render",
156 |         default=False,
157 |         action='store_true',
158 |         help="Whether to render or not. Defaults to False.")
159 |     parser.add_argument(
160 |         '--render_mode',
161 |         default='human',
162 |         help="What mode to render. Options are human, rgb_pixel, and rgb_array")
163 |     parser.add_argument(
164 |         '--game_state_file',
165 |         default=None,
166 |         help="File from which to load game state.")
167 |     parser.add_argument(
168 |         '--do_sleep',
169 |         default=True,
170 |         help="Whether we sleep after each rendering.")
171 |     args = parser.parse_args()
172 |     run(args)
173 | 
174 | 
175 | if __name__ == "__main__":
176 |     main()
177 | 


--------------------------------------------------------------------------------
/pommerman/cli/train_with_tensorforce.py:
--------------------------------------------------------------------------------
  1 | """Train an agent with TensorForce.
  2 | 
  3 | Call this with a config, a game, and a list of agents, one of which should be a
  4 | tensorforce agent. The script will start separate threads to operate the agents
  5 | and then report back the result.
  6 | 
  7 | An example with all three simple agents running ffa:
  8 | python train_with_tensorforce.py \
  9 |  --agents=tensorforce::ppo,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent \
 10 |  --config=PommeFFACompetition-v0
 11 | """
 12 | import atexit
 13 | import functools
 14 | import os
 15 | 
 16 | import argparse
 17 | import docker
 18 | from tensorforce.execution import Runner
 19 | from tensorforce.contrib.openai_gym import OpenAIGym
 20 | import gym
 21 | 
 22 | from pommerman import helpers, make
 23 | from pommerman.agents import TensorForceAgent
 24 | 
 25 | 
 26 | CLIENT = docker.from_env()
 27 | 
 28 | 
 29 | def clean_up_agents(agents):
 30 |     """Stops all agents"""
 31 |     return [agent.shutdown() for agent in agents]
 32 | 
 33 | 
 34 | class WrappedEnv(OpenAIGym):
 35 |     '''An Env Wrapper used to make it easier to work
 36 |     with multiple agents'''
 37 | 
 38 |     def __init__(self, gym, visualize=False):
 39 |         self.gym = gym
 40 |         self.visualize = visualize
 41 | 
 42 |     def execute(self, action):
 43 |         if self.visualize:
 44 |             self.gym.render()
 45 | 
 46 |         actions = self.unflatten_action(action=action)
 47 | 
 48 |         obs = self.gym.get_observations()
 49 |         all_actions = self.gym.act(obs)
 50 |         all_actions.insert(self.gym.training_agent, actions)
 51 |         state, reward, terminal, _ = self.gym.step(all_actions)
 52 |         agent_state = self.gym.featurize(state[self.gym.training_agent])
 53 |         agent_reward = reward[self.gym.training_agent]
 54 |         return agent_state, terminal, agent_reward
 55 | 
 56 |     def reset(self):
 57 |         obs = self.gym.reset()
 58 |         agent_obs = self.gym.featurize(obs[3])
 59 |         return agent_obs
 60 | 
 61 | 
 62 | def main():
 63 |     '''CLI interface to bootstrap taining'''
 64 |     parser = argparse.ArgumentParser(description="Playground Flags.")
 65 |     parser.add_argument("--game", default="pommerman", help="Game to choose.")
 66 |     parser.add_argument(
 67 |         "--config",
 68 |         default="PommeFFACompetition-v0",
 69 |         help="Configuration to execute. See env_ids in "
 70 |         "configs.py for options.")
 71 |     parser.add_argument(
 72 |         "--agents",
 73 |         default="tensorforce::ppo,test::agents.SimpleAgent,"
 74 |         "test::agents.SimpleAgent,test::agents.SimpleAgent",
 75 |         help="Comma delineated list of agent types and docker "
 76 |         "locations to run the agents.")
 77 |     parser.add_argument(
 78 |         "--agent_env_vars",
 79 |         help="Comma delineated list of agent environment vars "
 80 |         "to pass to Docker. This is only for the Docker Agent."
 81 |         " An example is '0:foo=bar:baz=lar,3:foo=lam', which "
 82 |         "would send two arguments to Docker Agent 0 and one to"
 83 |         " Docker Agent 3.",
 84 |         default="")
 85 |     parser.add_argument(
 86 |         "--record_pngs_dir",
 87 |         default=None,
 88 |         help="Directory to record the PNGs of the game. "
 89 |         "Doesn't record if None.")
 90 |     parser.add_argument(
 91 |         "--record_json_dir",
 92 |         default=None,
 93 |         help="Directory to record the JSON representations of "
 94 |         "the game. Doesn't record if None.")
 95 |     parser.add_argument(
 96 |         "--render",
 97 |         default=False,
 98 |         action='store_true',
 99 |         help="Whether to render or not. Defaults to False.")
100 |     parser.add_argument(
101 |         "--game_state_file",
102 |         default=None,
103 |         help="File from which to load game state. Defaults to "
104 |         "None.")
105 |     args = parser.parse_args()
106 | 
107 |     config = args.config
108 |     record_pngs_dir = args.record_pngs_dir
109 |     record_json_dir = args.record_json_dir
110 |     agent_env_vars = args.agent_env_vars
111 |     game_state_file = args.game_state_file
112 | 
113 |     # TODO: After https://github.com/MultiAgentLearning/playground/pull/40
114 |     #       this is still missing the docker_env_dict parsing for the agents.
115 |     agents = [
116 |         helpers.make_agent_from_string(agent_string, agent_id + 1000)
117 |         for agent_id, agent_string in enumerate(args.agents.split(","))
118 |     ]
119 | 
120 |     env = make(config, agents, game_state_file)
121 |     training_agent = None
122 | 
123 |     for agent in agents:
124 |         if type(agent) == TensorForceAgent:
125 |             training_agent = agent
126 |             env.set_training_agent(agent.agent_id)
127 |             break
128 | 
129 |     if args.record_pngs_dir:
130 |         assert not os.path.isdir(args.record_pngs_dir)
131 |         os.makedirs(args.record_pngs_dir)
132 |     if args.record_json_dir:
133 |         assert not os.path.isdir(args.record_json_dir)
134 |         os.makedirs(args.record_json_dir)
135 | 
136 |     # Create a Proximal Policy Optimization agent
137 |     agent = training_agent.initialize(env)
138 | 
139 |     atexit.register(functools.partial(clean_up_agents, agents))
140 |     wrapped_env = WrappedEnv(env, visualize=args.render)
141 |     runner = Runner(agent=agent, environment=wrapped_env)
142 |     runner.run(episodes=10, max_episode_timesteps=2000)
143 |     print("Stats: ", runner.episode_rewards, runner.episode_timesteps,
144 |           runner.episode_times)
145 | 
146 |     try:
147 |         runner.close()
148 |     except AttributeError as e:
149 |         pass
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     main()
154 | 


--------------------------------------------------------------------------------
/pommerman/constants.py:
--------------------------------------------------------------------------------
  1 | """The set of constants in the game.
  2 | 
  3 | This includes not just ints but also classes like Item, GameType, Action, etc.
  4 | """
  5 | from enum import Enum
  6 | 
  7 | RENDER_FPS = 15
  8 | BOARD_SIZE = 11
  9 | NUM_RIGID = 36
 10 | NUM_WOOD = 36
 11 | NUM_ITEMS = 20
 12 | BOARD_SIZE_ONE_VS_ONE = 8
 13 | NUM_RIGID_ONE_VS_ONE = 16
 14 | NUM_WOOD_ONE_VS_ONE = 16
 15 | NUM_ITEMS_ONE_VS_ONE = 10
 16 | AGENT_VIEW_SIZE = 4
 17 | HUMAN_FACTOR = 32
 18 | DEFAULT_BLAST_STRENGTH = 2
 19 | DEFAULT_BOMB_LIFE = 9
 20 | # color for each of the 4 agents
 21 | AGENT_COLORS = [[231, 76, 60], [46, 139, 87], [65, 105, 225], [238, 130, 238]]
 22 | # color for each of the items.
 23 | ITEM_COLORS = [[240, 248, 255], [128, 128, 128], [210, 180, 140],
 24 |                [255, 153, 51], [241, 196, 15], [141, 137, 124]]
 25 | ITEM_COLORS += [(153, 153, 255), (153, 204, 204), (97, 169, 169), (48, 117,
 26 |                                                                    117)]
 27 | # If using collapsing boards, the step at which the board starts to collapse.
 28 | FIRST_COLLAPSE = 500
 29 | MAX_STEPS = 800
 30 | RADIO_VOCAB_SIZE = 8
 31 | RADIO_NUM_WORDS = 2
 32 | 
 33 | # Files for images and and fonts
 34 | RESOURCE_DIR = 'resources/'
 35 | FILE_NAMES = [
 36 |     'Passage', 'Rigid', 'Wood', 'Bomb', 'Flames', 'Fog', 'ExtraBomb',
 37 |     'IncrRange', 'Kick', 'AgentDummy', 'Agent0', 'Agent1', 'Agent2', 'Agent3',
 38 |     'AgentDummy-No-Background', 'Agent0-No-Background', 'Agent1-No-Background',
 39 |     'Agent2-No-Background', 'Agent3-No-Background', 'X-No-Background',
 40 |     'Agent0-Team', 'Agent1-Team', 'Agent2-Team', 'Agent3-Team',
 41 |     'Agent0-Team-No-Background', 'Agent1-Team-No-Background', 
 42 |     'Agent2-Team-No-Background', 'Agent3-Team-No-Background',
 43 | ]
 44 | IMAGES_DICT = {
 45 |     num: {
 46 |         'id': num,
 47 |         'file_name': '%s.png' % file_name,
 48 |         'name': file_name,
 49 |         'image': None
 50 |     } for num, file_name in enumerate(FILE_NAMES)
 51 | }
 52 | BOMB_FILE_NAMES = [
 53 |     'Bomb-1', 'Bomb-2', 'Bomb-3', 'Bomb-4', 'Bomb-5', 'Bomb-6',
 54 |     'Bomb-7', 'Bomb-8', 'Bomb-9', 'Bomb-10',
 55 | ]
 56 | BOMB_DICT = {
 57 |     num: {
 58 |         'id': num,
 59 |         'file_name': '%s.png' % file_name,
 60 |         'name': file_name,
 61 |         'image': None
 62 |     } for num, file_name in enumerate(BOMB_FILE_NAMES)
 63 | }
 64 | FONTS_FILE_NAMES = ['Cousine-Regular.ttf']
 65 | 
 66 | # Human view board configurations
 67 | BORDER_SIZE = 20
 68 | MARGIN_SIZE = 10
 69 | TILE_SIZE = 50
 70 | BACKGROUND_COLOR = (41, 39, 51, 255)
 71 | TILE_COLOR = (248, 221, 82, 255)
 72 | TEXT_COLOR = (170, 170, 170, 255)
 73 | 
 74 | 
 75 | class Item(Enum):
 76 |     """The Items in the game.
 77 | 
 78 |     When picked up:
 79 |       - ExtraBomb increments the agent's ammo by 1.
 80 |       - IncrRange increments the agent's blast strength by 1.
 81 |       - Kick grants the agent the ability to kick items.
 82 | 
 83 |     AgentDummy is used by team games to denote the third enemy and by ffa to
 84 |     denote the teammate.
 85 |     """
 86 |     Passage = 0
 87 |     Rigid = 1
 88 |     Wood = 2
 89 |     Bomb = 3
 90 |     Flames = 4
 91 |     Fog = 5
 92 |     ExtraBomb = 6
 93 |     IncrRange = 7
 94 |     Kick = 8
 95 |     AgentDummy = 9
 96 |     Agent0 = 10
 97 |     Agent1 = 11
 98 |     Agent2 = 12
 99 |     Agent3 = 13
100 | 
101 | 
102 | class GameType(Enum):
103 |     """The Game Types.
104 | 
105 |     FFA: 1v1v1v1. Submit an agent; it competes against other submitted agents.
106 |     Team: 2v2. Submit an agent; it is matched up randomly with another agent
107 |       and together take on two other similarly matched agents.
108 |     TeamRadio: 2v2. Submit two agents; they are matched up against two other
109 |       agents. Each team passes discrete communications to each other.
110 |     OneVsOne: 1v1. A research environment for dueling between two agents
111 |     """
112 |     FFA = 1
113 |     Team = 2
114 |     TeamRadio = 3
115 |     OneVsOne = 4
116 | 
117 | 
118 | class Action(Enum):
119 |     '''The Actions an agent can take'''
120 |     Stop = 0
121 |     Up = 1
122 |     Down = 2
123 |     Left = 3
124 |     Right = 4
125 |     Bomb = 5
126 | 
127 | 
128 | class Result(Enum):
129 |     '''The results available for the end of the game'''
130 |     Win = 0
131 |     Loss = 1
132 |     Tie = 2
133 |     Incomplete = 3
134 | 
135 | 
136 | class InvalidAction(Exception):
137 |     '''Invalid Actions Exception'''
138 |     pass
139 | 


--------------------------------------------------------------------------------
/pommerman/envs/__init__.py:
--------------------------------------------------------------------------------
1 | '''Allows each evn to be accessed through this module.'''
2 | from . import v0
3 | from . import v1
4 | from . import v2
5 | 


--------------------------------------------------------------------------------
/pommerman/envs/v1.py:
--------------------------------------------------------------------------------
  1 | """The Pommerman v1 Environment, which implements a collapsing board.
  2 | 
  3 | This environment is the same as v0.py, except that the board will collapse
  4 | according to a uniform schedule beginning at the kwarg first_collapse.
  5 | 
  6 | The collapsing works in the following manner:
  7 | 1. Set the collapsing schedule. This is uniform starting at step first_collapse
  8 |    and ending at step max_steps.
  9 | 2. Number the rings on the board from 0 to board_size-1 s.t. the outermost ring
 10 |    is 0 and the innermost ring is board_size-1. The cells in the former are
 11 |    [[0, i], [i, 0], [board_size-1, i], [i, board_size-1] for i in
 12 |    [0, board_size-1]] and the latter is, assuming an odd board_size,
 13 |    [[(board_size-1)/2, (board_size-1)/2]].
 14 | 3. When we are at a step in the collapsing schedule, we take the matching ring
 15 |    and turn it into rigid walls. This has the effect of destroying any items,
 16 |    bombs (which don't go off), and agents in those squares.
 17 | """
 18 | from .. import constants
 19 | from .. import utility
 20 | from . import v0
 21 | 
 22 | 
 23 | class Pomme(v0.Pomme):
 24 |     '''The second hardest pommerman env. v1 addes a collapsing board.'''
 25 |     metadata = {
 26 |         'render.modes': ['human', 'rgb_array', 'rgb_pixel'],
 27 |         'video.frames_per_second': constants.RENDER_FPS
 28 |     }
 29 | 
 30 |     def __init__(self, *args, **kwargs):
 31 |         super().__init__(*args, **kwargs)
 32 |         first_collapse = kwargs.get('first_collapse')
 33 |         self.collapses = list(
 34 |             range(first_collapse, self._max_steps,
 35 |                   int((self._max_steps - first_collapse) / 4)))
 36 | 
 37 |     def _collapse_board(self, ring):
 38 |         """Collapses the board at a certain ring radius.
 39 | 
 40 |         For example, if the board is 13x13 and ring is 0, then the the ring of
 41 |         the first row, last row, first column, and last column is all going to
 42 |         be turned into rigid walls. All agents in that ring die and all bombs
 43 |         are removed without detonating.
 44 |         
 45 |         For further rings, the values get closer to the center.
 46 | 
 47 |         Args:
 48 |           ring: Integer value of which cells to collapse.
 49 |         """
 50 |         board = self._board.copy()
 51 | 
 52 |         def collapse(r, c):
 53 |             '''Handles the collapsing of the board. Will
 54 |             kill of remove any item/agent that is on the
 55 |             collapsing tile.'''
 56 |             if utility.position_is_agent(board, (r, c)):
 57 |                 # Agent. Kill it.
 58 |                 num_agent = board[r][c] - constants.Item.Agent0.value
 59 |                 agent = self._agents[num_agent]
 60 |                 agent.die()
 61 |             if utility.position_is_bomb(self._bombs, (r, c)):
 62 |                 # Bomb. Remove the bomb. Update agent's ammo tally.
 63 |                 new_bombs = []
 64 |                 for b in self._bombs:
 65 |                     if b.position == (r, c):
 66 |                         b.bomber.incr_ammo()
 67 |                     else:
 68 |                         new_bombs.append(b)
 69 |                 self._bombs = new_bombs
 70 |             if utility.position_is_flames(board, (r, c)):
 71 |                 self._flames = [f for f in self._flames if f.position != (r,c)]
 72 |             if (r, c) in self._items:
 73 |                 # Item. Remove the item.
 74 |                 del self._items[(r, c)]
 75 |             board[r][c] = constants.Item.Rigid.value
 76 | 
 77 |         for cell in range(ring, self._board_size - ring):
 78 |             collapse(ring, cell)
 79 |             if ring != cell:
 80 |                 collapse(cell, ring)
 81 | 
 82 |             end = self._board_size - ring - 1
 83 |             collapse(end, cell)
 84 |             if end != cell:
 85 |                 collapse(cell, end)
 86 | 
 87 |         return board
 88 | 
 89 |     def get_json_info(self):
 90 |         ret = super().get_json_info()
 91 |         ret['collapses'] = json.dumps(self.collapses, cls=json_encoder)
 92 |         return ret
 93 | 
 94 |     def set_json_info(self):
 95 |         super().set_json_info()
 96 |         self.collapses = json.loads(self._init_game_state['collapses'])
 97 | 
 98 |     def step(self, actions):
 99 |         obs, reward, done, info = super().step(actions)
100 | 
101 |         for ring, collapse in enumerate(self.collapses):
102 |             if self._step_count == collapse:
103 |                 self._board = self._collapse_board(ring)
104 |                 break
105 | 
106 |         return obs, reward, done, info
107 | 


--------------------------------------------------------------------------------
/pommerman/envs/v2.py:
--------------------------------------------------------------------------------
  1 | """The Pommerman v2 Environment, which has communication across the agents.
  2 | 
  3 | The communication works by allowing each agent to send a vector of
  4 | radio_num_words (default = 2) from a vocabulary of size radio_vocab_size
  5 | (default = 8) to its teammate each turn. These vectors are passed into the
  6 | observation stream for each agent.
  7 | """
  8 | from gym import spaces
  9 | import numpy as np
 10 | import json
 11 | 
 12 | from .. import constants
 13 | from .. import utility
 14 | from . import v0
 15 | 
 16 | 
 17 | class Pomme(v0.Pomme):
 18 |     '''The hardest pommerman environment. This class expands env v0 
 19 |     adding communication between agents.'''
 20 |     metadata = {
 21 |         'render.modes': ['human', 'rgb_array', 'rgb_pixel'],
 22 |         'video.frames_per_second': constants.RENDER_FPS
 23 |     }
 24 | 
 25 |     def __init__(self, *args, **kwargs):
 26 |         self._radio_vocab_size = kwargs.get('radio_vocab_size')
 27 |         self._radio_num_words = kwargs.get('radio_num_words')
 28 |         if (self._radio_vocab_size and
 29 |                 not self._radio_num_words) or (not self._radio_vocab_size and
 30 |                                                self._radio_num_words):
 31 |             assert ("Include both radio_vocab_size and radio_num_words.")
 32 | 
 33 |         self._radio_from_agent = {
 34 |             agent: (0, 0)
 35 |             for agent in [
 36 |                 constants.Item.Agent0, constants.Item.Agent1,
 37 |                 constants.Item.Agent2, constants.Item.Agent3
 38 |             ]
 39 |         }
 40 |         super().__init__(*args, **kwargs)
 41 | 
 42 |     def _set_action_space(self):
 43 |         self.action_space = spaces.Tuple(
 44 |             tuple([spaces.Discrete(6)] +
 45 |                   [spaces.Discrete(self._radio_vocab_size
 46 |                                   )] * self._radio_num_words))
 47 | 
 48 |     def _set_observation_space(self):
 49 |         """The Observation Space for each agent.
 50 | 
 51 |         Total observatiosn: 3*board_size^2 + 12 + radio_vocab_size * radio_num_words:
 52 |         - all of the board (board_size^2)
 53 |         - bomb blast strength (board_size^2).
 54 |         - bomb life (board_size^2)
 55 |         - agent's position (2)
 56 |         - player ammo counts (1)
 57 |         - blast strength (1)
 58 |         - can_kick (1)
 59 |         - teammate (one of {AgentDummy.value, Agent3.value}).
 60 |         - enemies (three of {AgentDummy.value, Agent3.value}).
 61 |         - radio (radio_vocab_size * radio_num_words)
 62 |         """
 63 |         bss = self._board_size**2
 64 |         min_obs = [0] * 3 * bss + [0] * 5 + [constants.Item.AgentDummy.value
 65 |                                             ] * 4
 66 |         max_obs = [len(constants.Item)] * bss + [self._board_size
 67 |                                                 ] * bss + [25] * bss
 68 |         max_obs += [self._board_size] * 2 + [self._num_items] * 2 + [1]
 69 |         max_obs += [constants.Item.Agent3.value] * 4
 70 |         min_obs.extend([0] * self._radio_vocab_size * self._radio_num_words)
 71 |         max_obs.extend([1] * self._radio_vocab_size * self._radio_num_words)
 72 |         self.observation_space = spaces.Box(
 73 |             np.array(min_obs), np.array(max_obs))
 74 | 
 75 |     def get_observations(self):
 76 |         observations = super().get_observations()
 77 |         for obs in observations:
 78 |             obs['message'] = self._radio_from_agent[obs['teammate']]
 79 | 
 80 |         self.observations = observations
 81 |         return observations
 82 | 
 83 |     def step(self, actions):
 84 |         personal_actions = []
 85 |         radio_actions = []
 86 |         for agent_actions, agent in zip(actions, self._agents):
 87 |             if type(agent_actions) == int or not agent.is_alive:
 88 |                 personal_actions.append(agent_actions)
 89 |                 radio_actions.append((0, 0))
 90 |             elif type(agent_actions) in [tuple, list]:
 91 |                 personal_actions.append(agent_actions[0])
 92 |                 radio_actions.append(
 93 |                     tuple(agent_actions[1:(1+self._radio_num_words)]))
 94 |             else:
 95 |                 raise
 96 | 
 97 |             self._radio_from_agent[getattr(
 98 |                 constants.Item, 'Agent%d' % agent.agent_id)] = radio_actions[-1]
 99 | 
100 |         return super().step(personal_actions)
101 | 
102 |     @staticmethod
103 |     def featurize(obs):
104 |         ret = super().featurize(obs)
105 |         message = obs['message']
106 |         message = utility.make_np_float(message)
107 |         return np.concatenate((ret, message))
108 | 
109 |     def get_json_info(self, json_encoder=utility.PommermanJSONEncoder):
110 |         ret = super().get_json_info()
111 |         ret['radio_vocab_size'] = json.dumps(
112 |             self._radio_vocab_size, cls=json_encoder)
113 |         ret['radio_num_words'] = json.dumps(
114 |             self._radio_num_words, cls=json_encoder)
115 | 
116 |         # enum to json dict
117 |         radio_from_agent = {}
118 |         for agent, radio in self._radio_from_agent.items():
119 |             radio_from_agent.update({agent.name: radio})
120 |         ret['radio_from_agent'] = json.dumps(
121 |             radio_from_agent, cls=json_encoder)
122 | 
123 |         return ret
124 | 
125 |     def set_json_info(self):
126 |         super().set_json_info()
127 |         self.radio_vocab_size = json.loads(
128 |             self._init_game_state['radio_vocab_size'])
129 |         self.radio_num_words = json.loads(
130 |             self._init_game_state['radio_num_words'])
131 | 
132 |         # json dict to enum
133 |         radio_from_agent = json.loads(self._init_game_state['radio_from_agent'])
134 |         for agent, radio in radio_from_agent.items():
135 |             self._radio_from_agent.update({constants.Item[agent]: radio})
136 | 


--------------------------------------------------------------------------------
/pommerman/helpers/__init__.py:
--------------------------------------------------------------------------------
 1 | ''' Helpers'''
 2 | import os
 3 | from .. import agents
 4 | 
 5 | USE_GAME_SERVERS = os.getenv("PLAYGROUND_USE_GAME_SERVERS")
 6 | GAME_SERVERS = {id_: os.getenv("PLAYGROUND_GAME_INSTANCE_%d" % id_)
 7 |                 for id_ in range(4)}
 8 | 
 9 | 
10 | # NOTE: This routine is meant for internal usage.
11 | def make_agent_from_string(agent_string, agent_id, docker_env_dict=None):
12 |     '''Internal helper for building an agent instance'''
13 |     
14 |     agent_type, agent_control = agent_string.split("::")
15 | 
16 |     assert agent_type in ["player", "playerblock", "simple", "random", "docker", "http" , "test", "tensorforce"]
17 | 
18 |     agent_instance = None
19 | 
20 |     if agent_type == "player":
21 |         agent_instance = agents.PlayerAgent(agent_control=agent_control)
22 |     elif agent_type == "playerblock":
23 |         agent_instance = agents.PlayerAgentBlocking(agent_control=agent_control)
24 |     elif agent_type == "simple":
25 |         agent_instance = agents.SimpleAgent()
26 |     elif agent_type == "random":
27 |         agent_instance = agents.RandomAgent()
28 |     elif agent_type == "docker":
29 |         port = agent_id + 1000
30 |         if not USE_GAME_SERVERS:
31 |             server = 'http://localhost'
32 |         else:
33 |             server = GAME_SERVERS[agent_id]
34 |         assert port is not None
35 |         agent_instance = agents.DockerAgent(
36 |             agent_control, port=port, server=server, env_vars=docker_env_dict)
37 |     elif agent_type == "http":
38 |         host, port = agent_control.split(":")
39 |         agent_instance = agents.HttpAgent(port=port, host=host)
40 |     elif agent_type == "test":
41 |         agent_instance = eval(agent_control)()
42 |     elif agent_type == "tensorforce":
43 |         agent_instance = agents.TensorForceAgent(algorithm=agent_control)
44 | 
45 |     return agent_instance
46 | 


--------------------------------------------------------------------------------
/pommerman/network/README.md:
--------------------------------------------------------------------------------
 1 | ## IonPlayer (Pommerman network module)
 2 | ## Running:
 3 | Run the command `ion_client` for running the client and `ion_server` for running the server
 4 | ## How does this work:
 5 | ```
 6 | Match-making pseudo-code:
 7 | Client:
 8 | 1. Run wrapper on client which handles network + environment
 9 | 2. Connect to Server and send a "match" request
10 | Server:
11 | 1. Receive match request and look for other users who have sent a match request as well
12 | 2. If amount of players is equal to 4 and amount of matches running in parallel aren't more than a specified amount then send an "ready" request to the 4 players and wait
13 | Client:
14 | 3. Respond to "ready" request with another "ready"
15 | Server:
16 | 3A. If ready was not received from a user: Remove user from active players list and go back to step 1 (Look for another pair)
17 | 3B. If ready was received from everyone: Delegate a process to that match
18 | ```
19 | ```
20 | Match-processing loop pseudo-code:
21 | Server:
22 | 1. Send observation to all players with timeout
23 | Client:
24 | 1. Send single action integer to server
25 | Server:
26 | 2A. If action was received within timeout then parse it
27 | 2B. If action wasn't received or was received after timeout then issue a STOP action
28 | ```
29 | ```
30 | Security considerations:
31 | 1. Isolated channels must be kept for each and every player as to prevent cheating by reading other messages on a single channel
32 | 2. In addition to 1 everything should also work on a single port
33 | Both of these can be easily handled using WebSocket (https://en.wikipedia.org/wiki/WebSocket)
34 | ```
35 | ## The network code originated from the following repositories:
36 | * ionclient - https://github.com/PixelyIon/ionplayer-client
37 | * ionserver - https://github.com/PixelyIon/ionplayer-server


--------------------------------------------------------------------------------
/pommerman/network/__init__.py:
--------------------------------------------------------------------------------
1 | """Import the network modules"""
2 | from . import client
3 | from . import server
4 | 


--------------------------------------------------------------------------------
/pommerman/network/client/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """IonClient Entry Point  
  3 | 
  4 | This library allows playing of matches on network via the WebSockets protocol.  
  5 | Functions:  
  6 | init() - If you want to run the application normally  
  7 | intent(network) - If you want to pass a pre-existing network object but want to
  8 | prompt the user about what they want to do  
  9 | match(network, room=False, agent=False, ui_en=False) - If you want 
 10 | to start a match directly  
 11 | replay(network, id=False, ui_en=False) - If you want to start a replay directly  
 12 | """
 13 | 
 14 | import ui
 15 | from . import constants
 16 | from .network import Network
 17 | import signal
 18 | import sys
 19 | import os
 20 | import pommerman
 21 | import importlib
 22 | import gym
 23 | import numpy
 24 | 
 25 | 
 26 | def _exit_handler(_s=None, _h=None):
 27 |     """Arguments:  
 28 |     * _s: Unused argument  
 29 |     * _h: Unused argument  
 30 |     Description: Handle exiting the application"""
 31 |     ui.info(ui.yellow, "Exiting..")
 32 |     exit(0)
 33 | 
 34 | 
 35 | def init():
 36 |     """Description: Starts up the application normally by asking the user about
 37 |     the server they want to connect to"""
 38 |     if ui.ask_yes_no(constants.Strings.server_prompt.value):
 39 |         domain = ui.ask_string(constants.Strings.server_ip.value)
 40 |         if domain is None:
 41 |             ui.fatal(constants.Exceptions.invalid_ip.value)
 42 |     else:
 43 |         domain = "play.pommerman.com:5050"
 44 |     ui.info(
 45 |         constants.Strings.server_connecting_p1.value,
 46 |         ui.yellow,
 47 |         constants.Strings.server_connecting_p2.value,
 48 |         ui.reset,
 49 |         constants.Strings.server_connecting_p3.value,
 50 |     )
 51 |     network = Network(domain)
 52 |     try:
 53 |         status = network.server_status()
 54 |     except Exception as e:
 55 |         ui.fatal(e)
 56 |     signal.signal(signal.SIGINT, _exit_handler)
 57 |     ui.info(
 58 |         constants.Strings.server_connected.value,
 59 |         ui.yellow,
 60 |         constants.Strings.server_players.value,
 61 |         str(status[0]) + ",",
 62 |         constants.Strings.server_matches.value,
 63 |         status[1],
 64 |     )
 65 |     intent(network)
 66 | 
 67 | 
 68 | def _agent_prompt():
 69 |     """Description: Prompt the user to import their agent"""
 70 |     sys.path.append(os.getcwd())
 71 |     agent = importlib.import_module(ui.ask_string(constants.Strings.match_import.value))
 72 |     agent_class = ui.ask_string(constants.Strings.match_class_name.value)
 73 |     if agent_class not in agent.__dir__():
 74 |         ui.fatal(constants.Strings.error_invalid_class.value)
 75 |     agent = getattr(agent, agent_class)
 76 |     if getattr(agent, "act"):
 77 |         ui.info(ui.green, constants.Strings.match_agent_success.value)
 78 |     return agent
 79 | 
 80 | 
 81 | def intent(network):
 82 |     """Description: This creates a prompt for the user where they can choose to:  
 83 |     * Play a match  
 84 |     * Create/Join a room  
 85 |     * Replay a match  
 86 |     * Exit the application  
 87 |     Arguments:   
 88 |     * network: An `network`(pommerman.network.ion_client.network) object  
 89 |     """
 90 |     i = ui.ask_choice(
 91 |         constants.Strings.intent.value,
 92 |         [
 93 |             constants.Strings.intent_match.value,
 94 |             constants.Strings.intent_room.value,
 95 |             constants.Strings.intent_replay.value,
 96 |             constants.Strings.intent_exit.value,
 97 |         ],
 98 |     )
 99 |     if i == constants.Strings.intent_match.value:
100 |         agent = _agent_prompt()
101 |         match(network, agent=agent, ui_en=True)
102 |     elif i == constants.Strings.intent_room.value:
103 |         room = str(ui.ask_string(constants.Strings.room_code.value))
104 |         agent = _agent_prompt()
105 |         match(network, room=room, agent=agent, ui_en=True)
106 |     elif i == constants.Strings.intent_replay.value:
107 |         replay(network, ui_en=True)
108 |     elif i == constants.Strings.intent_exit.value:
109 |         exit(0)
110 | 
111 | 
112 | def match(network, room=False, agent=False, ui_en=False):
113 |     """Description: This facilitates playing a match  
114 |     Arguments:  
115 |     * network: An `network`(pommerman.network.ion_client.network) object  
116 |     * room: If String, The room to be created/joined. If False, the public \
117 | room will be joined  
118 |     * agent: The class of the agent should be a derivative of BaseAgent  
119 |     * ui_en: If the UI is enabled or disabled (This also controls if exception \
120 | are raised or not)
121 |     Returns: Array [reward, match_id]"""
122 |     agent = agent()
123 |     if ui_en:
124 |         ui.info(ui.yellow, constants.Strings.server_comm.value)
125 |     try:
126 |         network.join_list(room)
127 |     except Exception as e:
128 |         if ui_en:
129 |             ui.fatal(e)
130 |         raise e
131 |     if ui_en:
132 |         ui.info(constants.Strings.match_variant.value, ui.yellow, network.mode)
133 |         ui.info(ui.yellow, constants.Strings.match_wait.value)
134 |     try:
135 |         network.wait_match()
136 |     except Exception as e:
137 |         if ui_en:
138 |             ui.fatal(e)
139 |         raise e
140 |     if ui_en:
141 |         ui.info(constants.Strings.match_run.value, "#" + network.match_id)
142 |     for mode in pommerman.constants.GameType:
143 |         if mode.name in network.mode:
144 |             agent.init_agent(
145 |                 id=0, game_type=mode
146 |             )  # We always use ID as 0 as the server doesn't return it
147 |     while True:
148 |         try:
149 |             match_obj = network.match_get()
150 |         except Exception as e:
151 |             if ui_en:
152 |                 ui.fatal(e)
153 |             raise e
154 |         # match_obj[0] is the intent: 0 = OBS, 1 = Agent Dead, 2 = Match End
155 |         if match_obj[0] is 0:
156 |             action = agent.act(match_obj[1], gym.spaces.Discrete(6))
157 |             try:
158 |                 network.send_move(action, match_obj[2])
159 |             except Exception as e:
160 |                 if ui_en:
161 |                     ui.fatal(e)
162 |                 raise e
163 |         elif match_obj[0] is 2:
164 |             agent.episode_end(reward=match_obj[1])
165 |             if ui_en:
166 |                 if match_obj[1] == 1:
167 |                     ui.info(constants.Strings.match_won.value)
168 |                 if match_obj[1] == -1:
169 |                     ui.info(constants.Strings.match_loss_draw.value)
170 |                 ui.info(
171 |                     constants.Strings.match_agent.value,
172 |                     ui.yellow,
173 |                     pommerman.constants.Item(match_obj[2]).name,
174 |                 )
175 |             else:
176 |                 return [match_obj[1], network.match_id]
177 |             break
178 |     ui.info(constants.Strings.match_replay.value, ui.yellow, network.match_id)
179 |     if ui.ask_yes_no(constants.Strings.match_ask_replay.value):
180 |         replay(network, network.match_id)
181 |     else:
182 |         intent(network)
183 | 
184 | 
185 | def replay(network, id=False, ui_en=False):
186 |     """Description: This replays a particular match  
187 |     Arguments:  
188 |     * network: An `network`(pommerman.network.ion_client.network) object  
189 |     * id: The ID of a match to be played. If False, the user is prompted about \
190 | it.  
191 |     * ui_en: If the UI is enabled or disabled (This also controls if exception are\
192 | raised or not)"""
193 |     if not id and ui_en:
194 |         id = ui.ask_string(constants.Strings.replay_prompt.value)
195 |         if id is None:
196 |             ui.fatal(constants.Strings.error_invalid_id.value)
197 |         id = str(id)
198 |     if id[0] == "#":
199 |         id = id[1:]
200 |     ui.info(
201 |         constants.Strings.server_replay_p1.value,
202 |         ui.yellow,
203 |         "#" + str(id),
204 |         ui.reset,
205 |         constants.Strings.server_replay_p2.value,
206 |     )
207 |     try:
208 |         replay_obj = network.get_replay(id)
209 |     except Exception as e:
210 |         if ui_en:
211 |             ui.fatal(e)
212 |         raise e
213 |     if ui_en:
214 |         ui.info(constants.Strings.replay_start.value, ui.yellow, "#" + str(id))
215 |     env = pommerman.make(
216 |         replay_obj["mode"],
217 |         [
218 |             pommerman.agents.BaseAgent(),
219 |             pommerman.agents.BaseAgent(),
220 |             pommerman.agents.BaseAgent(),
221 |             pommerman.agents.BaseAgent(),
222 |         ],
223 |     )
224 |     env.reset()
225 |     env._board = numpy.array(replay_obj["board"])
226 |     # Note: Render FPS is set to 30 as it'll be smoother
227 |     env._render_fps = 30
228 |     for i in replay_obj["actions"]:
229 |         env.render()
230 |         reward, done = env.step(i)[1:3]
231 |         if done:
232 |             break
233 |     if reward != replay_obj["reward"]:
234 |         if ui_en:
235 |             ui.info(ui.yellow, constants.Exceptions.replay_no_reward.value)
236 |         else:
237 |             raise Exception(constants.Exceptions.replay_no_reward.value)
238 |     env.close()
239 |     if ui_en:
240 |         ui.info(ui.yellow, constants.Strings.replay_end.value)
241 |     intent(network)
242 | 
243 | 
244 | if __name__ == "__main__":
245 |     init()
246 | 


--------------------------------------------------------------------------------
/pommerman/network/client/constants.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """This holds all of the constants used by ion_client"""
 3 | 
 4 | import enum
 5 | 
 6 | 
 7 | class Strings(enum.Enum):
 8 |     """Define all the strings"""
 9 |     server_prompt = "Connect to custom server ?"
10 |     server_ip = "Enter IP of server to connect to:"
11 |     server_connecting_p1 = "Connecting to"
12 |     server_connecting_p2 = "IonPlayer"
13 |     server_connecting_p3 = "servers"
14 |     server_connected = "Connected to server:"
15 |     server_players = "Concurrent players:"
16 |     server_matches = "Concurrent matches:"
17 |     server_comm = "Communicating with server ✈"
18 |     server_replay_p1 = "Retrieving match"
19 |     server_replay_p2 = "from server"
20 |     intent = "What do you want to do ?"
21 |     intent_match = "Join the public room"
22 |     intent_room = "Join a room"
23 |     intent_replay = "Look at a replay"
24 |     intent_exit = "Quit the application"
25 |     room_code = "Enter the name of the room you want to join/create"
26 |     match_import = "Input the module which has your agent's class. EG: pommerman.agents"
27 |     match_class_name = "Input the name of your agent's class (So class.act() is valid). EG: SimpleAgent"
28 |     match_agent_success = "The agent was successfully imported"
29 |     match_variant = "The variant of Pommerman used:"
30 |     match_wait = "Waiting for Server to allocate players to match.."
31 |     match_run = "Running match.."
32 |     match_won = "Your agent has won!"
33 |     match_loss_draw = "Your agent has either lost or there was a draw"
34 |     match_replay = "You can now view the replay using the match ID:"
35 |     match_ask_replay = "Do you want to replay the current match ?"
36 |     match_agent = "Your agent was"
37 |     replay_prompt = "Input the ID of the match ?"
38 |     replay_start = "Replaying match.."
39 |     replay_nomatch = "The reward of the replay doesn't match that of the game ?"
40 |     replay_end = "The replay is over."
41 |     error_no_recv = "The observation wasn't successfully retrieved from the server"
42 | 
43 | 
44 | class NetworkCommands(enum.Enum):
45 |     """Define all the network commands"""
46 |     check = 0
47 |     ping = 1
48 |     match = 2
49 |     room = 3
50 |     match_start = 4
51 |     match_act = 5
52 |     match_end = 6
53 |     replay = 7
54 |     status_ok = 10
55 |     status_fail = 11
56 |     status_full = 12
57 |     status_reg = 13
58 | 
59 | 
60 | class Exceptions(enum.Enum):
61 |     """Define all the exceptions"""
62 |     net_connect_fail = "Couldn't connect to the server"
63 |     net_respond_fail = "The server didn't respond correctly"
64 |     net_invalid_response = "The server sent an invalid response"
65 |     net_server_full = "The server is full"
66 |     net_server_closed = "The connection to the server was closed"
67 |     match_full = "The maximum amount of concurrent matches on the has been exceeded"
68 |     room_full = "The room is full"
69 |     replay_notfound = "Couldn't find replay on server"
70 |     replay_no_reward = "The current reward doesn't match the expected reward"
71 |     invalid_ip = "The provided IP is invalid"
72 |     invalid_id = "The provided match ID is invalid"
73 |     invalid_room = "The provided room name is invalid"
74 |     invalid_class = "Cannot find class in module file"
75 | 


--------------------------------------------------------------------------------
/pommerman/network/client/network.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """IonClient Network Manager
  3 | 
  4 | This file contains the Network class that can be used directly. It is also
  5 | thread-safe so feel free to run multiple network clients"""
  6 | 
  7 | import pommerman
  8 | import websocket
  9 | from . import constants
 10 | import rapidjson
 11 | import threading
 12 | import gzip
 13 | import numpy
 14 | 
 15 | 
 16 | class Network(object):
 17 |     """This class is responsible for handling communication b/w Client
 18 |     and Server"""
 19 | 
 20 |     def __init__(self, ip):
 21 |         """Arguments:  
 22 |         * ip: The IP of the server"""
 23 |         self.ws_ = websocket.create_connection(
 24 |             "ws://" + str(ip))
 25 |         self.lock = threading.Lock()
 26 | 
 27 |     def server_status(self):
 28 |         """Description: Retrieves the status of the server"""
 29 |         self._send(intent=constants.NetworkCommands.check.value)
 30 |         message_recieved = self._recieve()
 31 |         if message_recieved[
 32 |                 "intent"] == constants.NetworkCommands.status_ok.value:
 33 |             return [message_recieved["players"], message_recieved["matches"]]
 34 |         elif message_recieved[
 35 |                 "intent"] == constants.NetworkCommands.status_full.value:
 36 |             raise Exception(constants.Exceptions.net_server_full.value)
 37 |         else:
 38 |             raise Exception(constants.Exceptions.net_respond_fail.value)
 39 | 
 40 |     def join_list(self, room=False):
 41 |         """Description: Check if the server actually responds  
 42 |         Arguments:  
 43 |         * room: The room to be created/joined. If False, the public room will \
 44 | be joined, it should be a String"""
 45 |         if not room:
 46 |             self._send(intent=constants.NetworkCommands.match.value)
 47 |         else:
 48 |             self._send(
 49 |                 intent=constants.NetworkCommands.room.value, room=str(room))
 50 |         message_recieved = self._recieve()
 51 |         if message_recieved[
 52 |                 "intent"] == constants.NetworkCommands.status_full.value:
 53 |             if room:
 54 |                 raise Exception(constants.Exceptions.room_full.value)
 55 |             else:
 56 |                 raise Exception(constants.Exceptions.match_full.value)
 57 |         self.id = message_recieved["player_id"]
 58 |         self.mode = message_recieved["mode"]
 59 | 
 60 |     def wait_match(self):
 61 |         """Description: Wait for a response from the server regarding a match 
 62 |         request"""
 63 |         message_recieved = self._recieve()
 64 |         if message_recieved[
 65 |                 "intent"] == constants.NetworkCommands.match_start.value:
 66 |             self.match_id = message_recieved["match_id"]
 67 | 
 68 |     def match_get(self):
 69 |         """Description: Get the next step of the match  
 70 |         Return values(Format: "0th element - Meaning - Other elements"):
 71 |         * 0 - Agent is still alive - OBS and Turn ID
 72 |         * 1 - Agent is dead - Nothing
 73 |         * 2 - Match has ended - Reward and agent item ID correlating to \
 74 | pommerman.constants.Items
 75 |         """
 76 |         self.lock.acquire()
 77 |         try:
 78 |             message_recieved = self.ws_.recv()
 79 |         except:
 80 |             raise Exception(constants.Exceptions.net_respond_fail.value)
 81 |         finally:
 82 |             self.lock.release()
 83 |         try:
 84 |             # Messages with normal match data are compressed using GZIP while
 85 |             # match end notifications aren't. So we move on to that if this
 86 |             # fails to decompress with gzip.
 87 |             message_decoded = rapidjson.loads(
 88 |                 str(gzip.decompress(message_recieved), "utf-8"))
 89 |         except:
 90 |             try:
 91 |                 message_decoded = rapidjson.loads(message_recieved)
 92 |                 if message_decoded[
 93 |                         "intent"] == constants.NetworkCommands.match_end.value:
 94 |                     return [
 95 |                         2,
 96 |                         int(message_decoded["reward"]),
 97 |                         int(message_decoded["agent"])
 98 |                     ]
 99 |             except:
100 |                 raise Exception(constants.Exceptions.net_invalid_response.value)
101 |         # Info: message_decoded - ["d"]=Dead, ["o"]=OBS, ["i"] = Turn ID
102 |         if message_decoded["d"]:
103 |             return [1]
104 |         obs = message_decoded["o"]
105 |         obs["teammate"] = pommerman.constants.Item[obs["teammate"]]
106 |         # Note: If position is not tuple SimpleAgent *will* error out
107 |         obs["position"] = tuple(obs["position"])
108 |         for x, y in enumerate(obs["enemies"]):
109 |             obs["enemies"][x] = pommerman.constants.Item[y]
110 |         for i in ["board", "bomb_life", "bomb_blast_strength"]:
111 |             obs[i] = numpy.asarray(obs[i])
112 |         return [0, obs, message_decoded["i"]]
113 | 
114 |     def send_move(self, action, turn_id):
115 |         """Description: Send the action to the server for playing out  
116 |         Arguments:  
117 |         * action: The action that has to be sent  
118 |         * turn_id: The ID of the step taken by the server (To sync up the \
119 | action of the agent and server)"""
120 |         self._send(
121 |             intent=constants.NetworkCommands.match_act.value,
122 |             player_id=self.id,
123 |             act=action,
124 |             match_id=self.match_id,
125 |             turn_id=turn_id)
126 | 
127 |     def get_replay(self, id):
128 |         """Description: Send the action to the server for playing out  
129 |         Arguments:  
130 |         * id: The ID of the match to be replayed"""
131 |         self._send(intent=constants.NetworkCommands.replay.value, replay_id=id)
132 |         try:
133 |             status, replay = rapidjson.loads(
134 |                 str(gzip.decompress(self.ws_.recv()), "utf-8"))
135 |         except ValueError:
136 |             raise Exception(constants.Exceptions.replay_notfound.value)
137 |         if status == constants.NetworkCommands.status_ok.value:
138 |             return replay
139 |         else:
140 |             raise Exception(constants.Exceptions.replay_notfound.value)
141 | 
142 |     def _send(self, **kwargs):
143 |         self.lock.acquire()
144 |         try:
145 |             self.ws_.send(rapidjson.dumps(kwargs))
146 |         except:
147 |             raise Exception(constants.Exceptions.net_server_closed.value)
148 |         finally:
149 |             self.lock.release()
150 | 
151 |     def _recieve(self):
152 |         self.lock.acquire()
153 |         try:
154 |             message_recieved = self.ws_.recv()
155 |         except:
156 |             if not self.ws_.connected:
157 |                 raise Exception(constants.Exceptions.net_server_closed.value)
158 |             else:
159 |                 raise Exception(constants.Exceptions.net_invalid_response.value)
160 |         finally:
161 |             self.lock.release()
162 |         try:
163 |             return rapidjson.loads(message_recieved)
164 |         except:
165 |             raise Exception(constants.Exceptions.net_invalid_response.value)


--------------------------------------------------------------------------------
/pommerman/network/server/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """IonServer - This library allows playing of matches on network via the
  3 | WebSockets protocol.  
  4 | Functions:  
  5 | init() - If you want to run the application normally  
  6 | run(max_players, max_matches, port, timeout, mode, ui_en=True, 
  7 | exit_handler=True) - If you want to programatically launch the server with
  8 | predefined parameters"""
  9 | 
 10 | import ui
 11 | import multiprocessing
 12 | from . import constants
 13 | from . import network
 14 | from . import match
 15 | import time
 16 | import random
 17 | import signal
 18 | import pommerman
 19 | 
 20 | MATCH_SUBPROCESS = []
 21 | 
 22 | 
 23 | def _exit_handler(subprocess_net):
 24 |     """Description: Return the exit handler with a reference to the subprocess_net
 25 |     variable."""
 26 | 
 27 |     def exit_handler(_s, _h):
 28 |         """Description: Handle exiting the application."""
 29 |         ui.info(ui.yellow, "Exiting..")
 30 |         subprocess_net.terminate()
 31 |         for i in MATCH_SUBPROCESS:
 32 |             i.terminate()
 33 |         exit(0)
 34 | 
 35 |     return exit_handler
 36 | 
 37 | 
 38 | def init():
 39 |     """Description: Initiate the application by asking questions."""
 40 |     ui.info(ui.yellow, constants.Strings.sever_starting.value)
 41 |     port = int(ui.ask_string(constants.Strings.server_port.value))
 42 |     max_players = int(ui.ask_string(constants.Strings.server_maxp.value))
 43 |     if max_players < 4:
 44 |         # If the maximum players allowed on the server is less than 4
 45 |         # which is the minimum required for a pommerman match then
 46 |         # notify the user about that and quit.
 47 |         ui.fatal(ui.yellow, constants.Strings.server_playercount_too_low.value)
 48 |     modes = []
 49 |     for i in pommerman.configs.__dir__():
 50 |         if i[-4:] == "_env":
 51 |             id = getattr(pommerman.configs, i)()["env_id"]
 52 |             if id[-2:] != "v2":
 53 |                 modes.append(id)
 54 |     timeout = float(ui.ask_string(constants.Strings.server_timeout.value))
 55 |     mode = str(ui.ask_choice(constants.Strings.server_mode.value, modes))
 56 |     run(port, max_players, timeout, mode, ui_en=True, exit_handler=True)
 57 | 
 58 | 
 59 | def run(port,
 60 |         max_players,
 61 |         timeout,
 62 |         mode,
 63 |         max_matches=False,
 64 |         ui_en=False,
 65 |         exit_handler=False):
 66 |     """Description: This function is responsible for running the server.  
 67 |     Arguments:  
 68 |     * port: The port used by the server  
 69 |     * max_players: The maximum amount of concurrent players  
 70 |     * timeout: (In Seconds) The time to wait before issuing the STOP action  
 71 |     * mode: The flavor of pommerman  
 72 |     * max_matches: The maximum amount of concurrent matches (If not defined this \
 73 | is set to int(max_players/4))
 74 |     * ui_en: If True, UI is enabled else UI is disabled  
 75 |     * exit_handler: If True, the exit handler is set else the exit handler \
 76 | isn't set"""
 77 |     netpipe, rnetpipe = multiprocessing.Pipe()
 78 |     netqueue = multiprocessing.Queue()
 79 |     subprocess_net = multiprocessing.Process(
 80 |         target=network.thread,
 81 |         args=(rnetpipe, netqueue, port, max_players, mode, timeout), daemon=True)
 82 |     subprocess_net.start()
 83 |     if not max_matches:
 84 |         max_matches = int(max_players / 4)
 85 |     if exit_handler:
 86 |         signal.signal(signal.SIGINT, _exit_handler(subprocess_net))
 87 |     if ui_en:
 88 |         ui.info(ui.yellow, constants.Strings.server_ready.value, ui.white,
 89 |                 ui.Symbol("✔", ":)"))
 90 |     while True:
 91 |         netpipe.send([constants.SubprocessCommands.get_players.value])
 92 |         concurrent_list, num_players, num_matches = netpipe.recv()
 93 |         if int(num_matches) < max_matches:
 94 |             for x in list(concurrent_list["room"].keys()):
 95 |                 i = concurrent_list["room"][x]
 96 |                 if len(i) >= 4:
 97 |                     MATCH_SUBPROCESS.append(_create_match(i, netqueue, mode))
 98 |                     del concurrent_list["room"][x]
 99 |             if len(concurrent_list["noroom"]) >= 4:
100 |                 e = random.sample(concurrent_list["noroom"],
101 |                                   (int(len(concurrent_list["noroom"]) / 4) * 4))
102 |                 for group in range(int(len(concurrent_list["noroom"]) / 4)):
103 |                     MATCH_SUBPROCESS.append(
104 |                         _create_match(e[group * 4:(group + 1) * 4], netqueue,
105 |                                       mode))
106 |                     for player in e[group * 4:(group + 1) * 4]:
107 |                         del concurrent_list["noroom"][concurrent_list["noroom"]
108 |                                                       .index(player)]
109 |             netpipe.send(
110 |                 [constants.SubprocessCommands.update_cc.value, concurrent_list])
111 |         if ui_en:
112 |             ui.info(
113 |                 "\033[2K\r",
114 |                 ui.white,
115 |                 constants.Strings.server_players.value,
116 |                 ui.yellow,
117 |                 "[",
118 |                 num_players,
119 |                 "/",
120 |                 max_players,
121 |                 "]",
122 |                 ui.white,
123 |                 constants.Strings.server_matches.value,
124 |                 ui.yellow,
125 |                 "[",
126 |                 num_matches,
127 |                 "/",
128 |                 max_matches,
129 |                 "]",
130 |                 end="")
131 |         for process in tuple(MATCH_SUBPROCESS):
132 |             if not process.is_alive():
133 |                 MATCH_SUBPROCESS.remove(process)
134 |         time.sleep(2)
135 | 
136 | 
137 | def _create_match(players, queue_subproc, mode):
138 |     """Description: This function is responsible for creating a match"""
139 |     subprocess = multiprocessing.Process(
140 |         target=match.thread, args=(players, queue_subproc, mode), daemon=True)
141 |     subprocess.start()
142 |     return subprocess
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     multiprocessing.freeze_support()
147 |     init()
148 | 


--------------------------------------------------------------------------------
/pommerman/network/server/constants.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """This holds all of the constants used by ion_server"""
 3 | 
 4 | import enum
 5 | 
 6 | 
 7 | class Strings(enum.Enum):
 8 |     """Define all the strings"""
 9 |     sever_starting = "Server is being initiated.."
10 |     server_ready = "Server is ready"
11 |     server_players = "Concurrent players"
12 |     server_matches = "Concurrent matches"
13 |     server_exit_prompt = "Are you sure you want to stop the server ?"
14 |     server_exit = "The server is quitting.."
15 |     server_port = "What port would you want the server to start on ?"
16 |     server_maxp = "What's the maximum amount of players that can be concurrently connected to the server ?"
17 |     server_playercount_too_low = "Input a number greater than 4"
18 |     server_timeout = "What's the timeout for the player response (In seconds) ?"
19 |     server_mode = "Which variant of Pommerman would you like the Server to run ?"
20 | 
21 | 
22 | class SubprocessCommands(enum.Enum):
23 |     """Define all the sub-process commands"""
24 |     get_players = 0
25 |     update_cc = 1
26 |     match_next = 2
27 |     player_drop = 3
28 |     match_end = 4
29 | 
30 | 
31 | class NetworkCommands(enum.Enum):
32 |     """Define all the network commands"""
33 |     check = 0
34 |     ping = 1
35 |     match = 2
36 |     room = 3
37 |     match_start = 4
38 |     match_act = 5
39 |     match_end = 6
40 |     replay = 7
41 |     status_ok = 10
42 |     status_fail = 11
43 |     status_full = 12
44 |     status_reg = 13
45 | 


--------------------------------------------------------------------------------
/pommerman/network/server/match.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """IonServer Match handler
  3 | 
  4 | This contains functions responsible for playing matches 
  5 | (You shouldn't use this file directly due to the very specialized 
  6 | interactions required for it to function in addition to parameters 
  7 | i.e: Pipes, Queues)"""
  8 | 
  9 | import multiprocessing
 10 | from . import constants
 11 | import uuid
 12 | import os
 13 | import rapidjson
 14 | import gzip
 15 | import enum
 16 | import pommerman
 17 | import numpy
 18 | 
 19 | 
 20 | def unique_uuid(dir):
 21 |     """Generates a unique UUID and checks for collision with files within the
 22 |     specified directory (So we don't override a pre-existing file)"""
 23 |     try:
 24 |         ls_dir = os.listdir(dir)
 25 |     except FileNotFoundError:
 26 |         os.makedirs(dir)
 27 |         ls_dir = []
 28 |     uuid_ = str(uuid.uuid4())[:10]
 29 |     while uuid_ + ".json" in ls_dir:
 30 |         uuid_ = str(uuid.uuid4())[:10]
 31 |     return uuid_
 32 | 
 33 | 
 34 | def resolve_classes(i):
 35 |     """Resolves observation into JSONable types by looping over every element
 36 |     in it"""
 37 |     if isinstance(i, tuple):
 38 |         i = list(i)
 39 |     for key, value in enumerate(i):
 40 |         if isinstance(i, dict):
 41 |             key = value
 42 |             value = i[key]
 43 |         if hasattr(value, '__iter__') and not isinstance(
 44 |                 i[key], str) and not isinstance(i[key], numpy.ndarray):
 45 |             i[key] = resolve_classes(value)
 46 |         elif isinstance(value, enum.Enum):
 47 |             i[key] = str(value.name)
 48 |         elif isinstance(value, numpy.ndarray):
 49 |             i[key] = value.tolist()
 50 |         elif isinstance(value, numpy.uint8) or isinstance(value, numpy.int64):
 51 |             i[key] = int(value)
 52 |     return i
 53 | 
 54 | 
 55 | def thread(players, queue_subproc, mode):
 56 |     """Handles running of the match loop"""
 57 |     uuid_ = unique_uuid("matches")
 58 |     base_agent = pommerman.agents.BaseAgent
 59 |     env = pommerman.make(
 60 |         mode,
 61 |         [base_agent(), base_agent(),
 62 |          base_agent(), base_agent()])
 63 |     net, net_end = multiprocessing.Pipe()
 64 |     queue_subproc.put([net_end, players, uuid_])
 65 |     obs = env.reset()
 66 |     record = {
 67 |         "board": numpy.array(env._board, copy=True).tolist(),
 68 |         "actions": [],
 69 |         "mode": str(mode)
 70 |     }
 71 |     done = False
 72 |     while not done:
 73 |         obs_res = resolve_classes(obs.copy())
 74 |         turn_id = str(uuid.uuid4())[:5]
 75 |         try:
 76 |             obs_bytes = []
 77 |             for key, value in enumerate(obs_res):
 78 |                 if 10 + key in obs[0]["alive"]:
 79 |                     obs_bytes.append(
 80 |                         gzip.compress(
 81 |                             bytes(
 82 |                                 rapidjson.dumps({
 83 |                                     "o": value,  # o = obs
 84 |                                     "i": turn_id,  # i = Turn ID
 85 |                                     "d": False  # d = Dead
 86 |                                 }),
 87 |                                 "utf8")))
 88 |                 else:
 89 |                     obs_bytes.append(
 90 |                         gzip.compress(
 91 |                             bytes(
 92 |                                 rapidjson.dumps({
 93 |                                     "d": True  # d = Dead
 94 |                                 }),
 95 |                                 "utf8")))
 96 |             net.send([
 97 |                 constants.SubprocessCommands.match_next.value, turn_id,
 98 |                 obs_bytes,
 99 |                 len(obs[0]["alive"])
100 |             ])
101 |             act = net.recv()
102 |         except:
103 |             act = [0, 0, 0, 0]
104 |         record["actions"].append(numpy.array(act, copy=True).tolist())
105 |         obs, rew, done = env.step(act)[:3]
106 |     record["reward"] = rew
107 |     env.close()
108 |     with open("./matches/" + uuid_ + ".json", "w") as file:
109 |         rapidjson.dump(record, file)
110 |     net.send([constants.SubprocessCommands.match_end.value, rew])
111 |     net.recv()
112 |     exit(0)
113 | 


--------------------------------------------------------------------------------
/pommerman/network/server/network.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """IonServer Network handler
  3 | 
  4 | This contains functions responsible Server-Client communication 
  5 | (You shouldn't use this file directly due to the very specialized 
  6 | interactions required for it to function in addition to parameters 
  7 | i.e: Pipes, Queues. This is the reason for the simple docstrings
  8 | for functions)"""
  9 | 
 10 | import asyncio
 11 | import websockets
 12 | import threading
 13 | import time
 14 | from . import constants
 15 | import os
 16 | import re
 17 | import gzip
 18 | import rapidjson
 19 | import uuid
 20 | 
 21 | CONCURRENTLY_LOOKING = {
 22 |     "room": {},
 23 |     "noroom": []
 24 | }  # This holds the IDs of players concurrently looking for a room
 25 | PLAYER_WS = {}  # This stores the mapping from player ID to the websocket object
 26 | MATCH_PROCESS = {}  # This holds pipes to match processes
 27 | MAX_PLAYERS = 0
 28 | PIPE_MAIN = False  # This holds the queue (Main-proc <-> Network-proc)
 29 | QUEUE_SUBPROC = False  # This holds the queue (Subproc <-> Network-proc)
 30 | MODE = ""
 31 | STOP_TIMEOUT = 0
 32 | 
 33 | 
 34 | async def message_parse(message, websocket):
 35 |     """Parse the messages recieved from the clients"""
 36 |     if message["intent"] is constants.NetworkCommands.check.value:
 37 |         await websocket.send(
 38 |             rapidjson.dumps({
 39 |                 "intent":
 40 |                 constants.NetworkCommands.status_ok.value,
 41 |                 "players":
 42 |                 len(PLAYER_WS),
 43 |                 "matches":
 44 |                 len(MATCH_PROCESS)
 45 |             }))
 46 |     elif message["intent"] is constants.NetworkCommands.match_act.value:
 47 |         if message["turn_id"] == MATCH_PROCESS[message["match_id"]]["turn_id"]:
 48 |             # Note: The statements below assign the action to the respective players
 49 |             MATCH_PROCESS[message["match_id"]]["act"][
 50 |                 MATCH_PROCESS[message["match_id"]]["players"].index(
 51 |                     message["player_id"])] = message["act"]
 52 |             MATCH_PROCESS[message["match_id"]]["recv"][MATCH_PROCESS[message[
 53 |                 "match_id"]]["players"].index(message["player_id"])] = True
 54 |     elif message["intent"] is constants.NetworkCommands.replay.value:
 55 |         try:
 56 |             with open(
 57 |                     os.path.join(
 58 |                         os.path.join(os.getcwd(), "matches"),
 59 |                         str(message["replay_id"]) + ".json"), 'r') as f:
 60 |                 # Note: Registry expression match comes after as it's an expensive operation as compared to file I/O
 61 |                 if re.fullmatch("^[a-z0-9-]*$",
 62 |                                 message["replay_id"]) is not None:
 63 |                     f = rapidjson.load(f)
 64 |                     await websocket.send(
 65 |                         gzip.compress(
 66 |                             bytes(
 67 |                                 rapidjson.dumps([
 68 |                                     constants.NetworkCommands.status_ok.value, f
 69 |                                 ]), "utf8")))
 70 |                 else:
 71 |                     await websocket.send(
 72 |                         gzip.compress(
 73 |                             bytes(
 74 |                                 rapidjson.dumps([
 75 |                                     constants.NetworkCommands.status_fail.value
 76 |                                 ]), "utf8")))
 77 |         except:
 78 |             await websocket.send(
 79 |                 gzip.compress(
 80 |                     bytes(
 81 |                         rapidjson.dumps(
 82 |                             [constants.NetworkCommands.status_fail.value]),
 83 |                         "utf8")))
 84 |     elif message["intent"] in [
 85 |             constants.NetworkCommands.match.value,
 86 |             constants.NetworkCommands.room.value
 87 |     ]:
 88 |         if len(PLAYER_WS) >= MAX_PLAYERS:
 89 |             await websocket.send(
 90 |                 rapidjson.dumps({
 91 |                     "intent":
 92 |                     constants.NetworkCommands.status_full.value
 93 |                 }))
 94 |             return
 95 |         uuid_ = str(uuid.uuid4())
 96 |         while uuid_ in PLAYER_WS:
 97 |             uuid_ = str(uuid.uuid4())
 98 |         PLAYER_WS[uuid_] = {"ws": websocket}
 99 |         if message["intent"] is constants.NetworkCommands.match.value:
100 |             CONCURRENTLY_LOOKING["noroom"].append(uuid_)
101 |             PLAYER_WS[uuid_]["noroom"] = True
102 |         elif message["intent"] is constants.NetworkCommands.room.value:
103 |             if message["room"] in CONCURRENTLY_LOOKING["room"]:
104 |                 if len(CONCURRENTLY_LOOKING["room"][message["room"]]) <= 4:
105 |                     CONCURRENTLY_LOOKING["room"][message["room"]].append(uuid_)
106 |                 else:
107 |                     await websocket.send(
108 |                         rapidjson.dumps({
109 |                             "intent":
110 |                             constants.NetworkCommands.status_full.value
111 |                         }))
112 |                     return
113 |             else:
114 |                 CONCURRENTLY_LOOKING["room"][message["room"]] = [uuid_]
115 |             PLAYER_WS[uuid_]["noroom"] = False
116 |             PLAYER_WS[uuid_]["room"] = str(message["room"])
117 |         await websocket.send(
118 |             rapidjson.dumps({
119 |                 "intent":
120 |                 constants.NetworkCommands.status_reg.value,
121 |                 "player_id":
122 |                 uuid_,
123 |                 "mode":
124 |                 MODE
125 |             }))
126 | 
127 | 
128 | async def ws_handler(websocket, pth=None):  # pylint: disable=unused-argument
129 |     """Handle the messages recieved by WebSocket (pth is not required but still\
130 | returned by the 'websockets' library)"""
131 |     try:
132 |         async for message in websocket:
133 |             try:
134 |                 await message_parse(rapidjson.loads(message), websocket)
135 |             except:
136 |                 pass
137 |     except websockets.exceptions.ConnectionClosed:
138 |         pass
139 | 
140 | 
141 | async def program_loop():
142 |     """Handles other network-related function"""
143 |     global CONCURRENTLY_LOOKING
144 |     while (True):
145 |         try:
146 |             for uuid_ in list(PLAYER_WS.keys()):
147 |                 i = PLAYER_WS[uuid_]
148 |                 if not i["ws"].open:
149 |                     if i["noroom"] is True:
150 |                         try:
151 |                             del CONCURRENTLY_LOOKING["noroom"][CONCURRENTLY_LOOKING[
152 |                                 "noroom"].index(uuid_)]
153 |                         except:
154 |                             pass
155 |                     elif i["noroom"] is False:
156 |                         try:
157 |                             del CONCURRENTLY_LOOKING["room"][i["room"]][
158 |                                 CONCURRENTLY_LOOKING["room"][i["room"]].index(
159 |                                     uuid_)]
160 |                         except:
161 |                             pass
162 |                     try:
163 |                         del PLAYER_WS[uuid_]
164 |                     except:
165 |                         pass
166 |             if PIPE_MAIN.poll():
167 |                 queue_msg = PIPE_MAIN.recv()
168 |                 if queue_msg[0] is constants.SubprocessCommands.get_players.value:
169 |                     PIPE_MAIN.send(
170 |                         [CONCURRENTLY_LOOKING,
171 |                          len(PLAYER_WS),
172 |                          len(MATCH_PROCESS)])
173 |                 elif queue_msg[0] is constants.SubprocessCommands.update_cc.value:
174 |                     CONCURRENTLY_LOOKING = queue_msg[1]
175 |             if not QUEUE_SUBPROC.empty():
176 |                 queue_msg = QUEUE_SUBPROC.get()
177 |                 MATCH_PROCESS[queue_msg[2]] = {
178 |                     "pipe": queue_msg[0],
179 |                     "players": queue_msg[1],
180 |                     "match_id": queue_msg[2],
181 |                     "free": False,
182 |                     "delete": False
183 |              }
184 |                 for i in queue_msg[1]:
185 |                     if i in PLAYER_WS:  # If the players didn't quits during matching
186 |                         await PLAYER_WS[i]["ws"].send(
187 |                             rapidjson.dumps({
188 |                                 "intent":
189 |                                 constants.NetworkCommands.match_start.value,
190 |                                 "match_id":
191 |                                 queue_msg[2]
192 |                             }))
193 |             for key in list(MATCH_PROCESS.keys()):
194 |                 value = MATCH_PROCESS[key]
195 |                 if value["pipe"].poll() and not value["free"]:
196 |                     pipe_msg = value["pipe"].recv()
197 |                     if pipe_msg[0] == constants.SubprocessCommands.match_next.value:
198 |                         value["free"] = True
199 |                         value["act"] = [0, 0, 0, 0]
200 |                         value["recv"] = [False, False, False, False]
201 |                         value["time"] = time.time()
202 |                         value["turn_id"] = pipe_msg[1]
203 |                         value["alive"] = pipe_msg[3]
204 |                         for x, y in enumerate(value["players"]):
205 |                             if y in list(PLAYER_WS.keys()):
206 |                                 try:
207 |                                     await PLAYER_WS[y]["ws"].send(pipe_msg[2][x])
208 |                                 except:
209 |                                     pass
210 |                             elif y not in PLAYER_WS:
211 |                                 value["act"][x] = 5
212 |                     if pipe_msg[0] is constants.SubprocessCommands.match_end.value:
213 |                         value["delete"] = True
214 |                         for x, y in enumerate(value["players"]):
215 |                             if y in PLAYER_WS:
216 |                                 await PLAYER_WS[y]["ws"].send(
217 |                                     rapidjson.dumps({
218 |                                         "intent":
219 |                                         constants.NetworkCommands.match_end.value,
220 |                                         "reward":
221 |                                         pipe_msg[1][x],
222 |                                         "agent":
223 |                                         10 + x
224 |                                     }))
225 |                 if value["free"]:
226 |                     if value["time"] + STOP_TIMEOUT < time.time(
227 |                     ) or value["recv"].count(True) == value["alive"]:
228 |                         value["pipe"].send(value["act"])
229 |                         value["free"] = False
230 |                 if value["delete"]:
231 |                     value["pipe"].send("END")
232 |                     del MATCH_PROCESS[key]
233 |         finally:
234 |             time.sleep(0.0001)  # Sleep for a while so other threads get the GIL
235 | 
236 | 
237 | def _run_server(port):
238 |     """Handles running the websocket thread"""
239 |     asyncio.set_event_loop(asyncio.new_event_loop())
240 |     asyncio.get_event_loop().run_until_complete(
241 |         websockets.serve(ws_handler, 'localhost', port))
242 |     asyncio.get_event_loop().run_forever()
243 | 
244 | 
245 | def thread(pipe_main, queue_subproc, port, max_players, mode, stop_timeout):
246 |     """Creates a network thread"""
247 |     # Note: Multiple threads are used so globals are used to share data b/w them
248 |     global MAX_PLAYERS, PIPE_MAIN, QUEUE_SUBPROC, MODE, STOP_TIMEOUT
249 |     MAX_PLAYERS = max_players
250 |     PIPE_MAIN = pipe_main
251 |     QUEUE_SUBPROC = queue_subproc
252 |     MODE = mode
253 |     STOP_TIMEOUT = stop_timeout
254 |     ws_thread = threading.Thread(target=_run_server, args=(port,))
255 |     ws_thread.start()
256 |     asyncio.set_event_loop(asyncio.new_event_loop())
257 |     asyncio.get_event_loop().run_until_complete(program_loop())
258 |     asyncio.get_event_loop().run_forever()
259 |     ws_thread.join()
260 | 


--------------------------------------------------------------------------------
/pommerman/resources/Agent0-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent0-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent0-Team-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent0-Team-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent0-Team.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent0-Team.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent0.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent1-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent1-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent1-Team-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent1-Team-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent1-Team.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent1-Team.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent1.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent2-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent2-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent2-Team-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent2-Team-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent2-Team.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent2-Team.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent2.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent3-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent3-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent3-Team-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent3-Team-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent3-Team.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent3-Team.png


--------------------------------------------------------------------------------
/pommerman/resources/Agent3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent3.png


--------------------------------------------------------------------------------
/pommerman/resources/AgentDummy-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/AgentDummy-No-Background.png


--------------------------------------------------------------------------------
/pommerman/resources/AgentDummy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/AgentDummy.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-1.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-10.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-2.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-3.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-4.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-5.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-6.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-7.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-8.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-9.png


--------------------------------------------------------------------------------
/pommerman/resources/Bomb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb.png


--------------------------------------------------------------------------------
/pommerman/resources/Cousine-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Cousine-Regular.ttf


--------------------------------------------------------------------------------
/pommerman/resources/ExtraBomb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/ExtraBomb.png


--------------------------------------------------------------------------------
/pommerman/resources/Flames.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Flames.png


--------------------------------------------------------------------------------
/pommerman/resources/Fog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Fog.png


--------------------------------------------------------------------------------
/pommerman/resources/IncrRange.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/IncrRange.png


--------------------------------------------------------------------------------
/pommerman/resources/Kick.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Kick.png


--------------------------------------------------------------------------------
/pommerman/resources/Passage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Passage.png


--------------------------------------------------------------------------------
/pommerman/resources/Rigid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Rigid.png


--------------------------------------------------------------------------------
/pommerman/resources/Skull.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Skull.png


--------------------------------------------------------------------------------
/pommerman/resources/Wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Wood.png


--------------------------------------------------------------------------------
/pommerman/resources/X-No-Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/X-No-Background.png


--------------------------------------------------------------------------------
/pommerman/runner/__init__.py:
--------------------------------------------------------------------------------
1 | '''Module entry point for the base docker agent.'''
2 | from .docker_agent_runner import DockerAgentRunner
3 | 


--------------------------------------------------------------------------------
/pommerman/runner/docker_agent_runner.py:
--------------------------------------------------------------------------------
 1 | '''This is the basic docker agent runner'''
 2 | import abc
 3 | import logging
 4 | import json
 5 | from .. import constants
 6 | import numpy as np
 7 | from flask import Flask, jsonify, request
 8 | 
 9 | LOGGER = logging.getLogger(__name__)
10 | 
11 | 
12 | class DockerAgentRunner(metaclass=abc.ABCMeta):
13 |     """Abstract base class to implement Docker-based agent"""
14 | 
15 |     def __init__(self):
16 |         pass
17 | 
18 |     @abc.abstractmethod
19 |     def act(self, observation, action_space):
20 |         """Given an observation, returns the action the agent should"""
21 |         raise NotImplementedError()
22 | 
23 |     def run(self, host="0.0.0.0", port=10080):
24 |         """Runs the agent by creating a webserver that handles action requests."""
25 |         app = Flask(self.__class__.__name__)
26 | 
27 |         @app.route("/action", methods=["POST"])
28 |         def action(): #pylint: disable=W0612
29 |             '''handles an action over http'''
30 |             data = request.get_json()
31 |             observation = data.get("obs")
32 |             observation = json.loads(observation)
33 | 
34 |             observation['teammate'] = constants.Item(observation['teammate'])
35 |             for enemy_id in range(len(observation['enemies'])):
36 |                 observation['enemies'][enemy_id] = constants.Item(observation['enemies'][enemy_id])
37 |             observation['position'] = tuple(observation['position'])
38 |             observation['board'] = np.array(observation['board'], dtype=np.uint8)
39 |             observation['bomb_life'] = np.array(observation['bomb_life'], dtype=np.float64)
40 |             observation['bomb_blast_strength'] = np.array(observation['bomb_blast_strength'], dtype=np.float64)
41 |             observation['bomb_moving_direction'] = np.array(observation['bomb_moving_direction'], dtype=np.float64)
42 |             observation['flame_life'] = np.array(observation['flame_life'], dtype=np.float64)
43 | 
44 |             action_space = data.get("action_space")
45 |             action_space = json.loads(action_space)
46 |             action = self.act(observation, action_space)
47 |             return jsonify({"action": action})
48 | 
49 |         @app.route("/init_agent", methods=["POST"])
50 |         def init_agent(): #pylint: disable=W0612
51 |             '''initiates agent over http'''
52 |             data = request.get_json()
53 |             id = data.get("id")
54 |             id = json.loads(id)
55 |             game_type = data.get("game_type")
56 |             game_type = constants.GameType(json.loads(game_type))
57 |             self.init_agent(id, game_type)
58 |             return jsonify(success=True)
59 | 
60 |         @app.route("/shutdown", methods=["POST"])
61 |         def shutdown(): #pylint: disable=W0612
62 |             '''Requests destruction of any created objects'''
63 |             self.shutdown()
64 |             return jsonify(success=True)
65 | 
66 |         @app.route("/episode_end", methods=["POST"])
67 |         def episode_end(): #pylint: disable=W0612
68 |             '''Info about end of a game'''
69 |             data = request.get_json()
70 |             reward = data.get("reward")
71 |             reward = json.loads(reward)
72 |             self.episode_end(reward)
73 |             return jsonify(success=True)
74 | 
75 |         @app.route("/ping", methods=["GET"])
76 |         def ping(): #pylint: disable=W0612
77 |             '''Basic agent health check'''
78 |             return jsonify(success=True)
79 | 
80 |         LOGGER.info("Starting agent server on port %d", port)
81 |         app.run(host=host, port=port)
82 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | docker~=3.1
 2 | gym~=0.10.5
 3 | scipy~=1.0
 4 | Pillow~=8.3
 5 | ruamel.yaml~=0.15
 6 | Flask~=0.12
 7 | requests~=2.18
 8 | jsonmerge~=1.5.1
 9 | astroid>=2
10 | isort~=4.3.4
11 | pylint>=2
12 | websockets~=6.0
13 | websocket-client~=0.53.0
14 | python-cli-ui~=0.7.1
15 | python-rapidjson~=0.6.3
16 | Click==7.0
17 | 


--------------------------------------------------------------------------------
/requirements_extra.txt:
--------------------------------------------------------------------------------
1 | tensorforce~=0.4.0
2 | tensorflow~=1.6
3 | jupyter~=1.0
4 | recommonmark~=0.3.0


--------------------------------------------------------------------------------
/scripts/build_sample_docker_agent:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | 
 5 | pushd $(dirname $0) > /dev/null
 6 |   pushd ..
 7 |     docker build . -f examples/docker-agent/Dockerfile
 8 |   popd > /dev/null
 9 | popd > /dev/null
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/run_sample_docker_agent:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | if [ -z "$1" ]; then
 6 |   echo "Usage: run_sample_docker_agent <container-id>"
 7 |   exit 1
 8 | fi
 9 | 
10 | container_id="$(docker run -d -p 10080:10080 "$1")"
11 | 
12 | echo "Started running container $container_id"
13 | exit 0


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from setuptools import setup, find_packages
 3 | 
 4 | CURRENT_PYTHON = sys.version_info[:2]
 5 | MIN_PYTHON = (3, 6)
 6 | 
 7 | if CURRENT_PYTHON < MIN_PYTHON:
 8 |     sys.stderr.write("""
 9 |         ============================
10 |         Unsupported Python Version
11 |         ============================
12 |         
13 |         Python {}.{} is unsupported. Please use a version newer than Python {}.{}.
14 |     """.format(*CURRENT_PYTHON, *MIN_PYTHON))
15 |     sys.exit(1)
16 | 
17 | with open('requirements.txt', 'r') as f:
18 |     install_requires = f.readlines()
19 | 
20 | with open('requirements_extra.txt', 'r') as f:
21 |     extras_require = f.readlines()
22 | 
23 | with open('VERSION') as f:
24 |     VERSION = f.read().strip()
25 | 
26 | files = ["resources/*"]
27 | 
28 | setup(name='pommerman',
29 |       version=VERSION,
30 |       description='PlayGround: AI Research into Multi-Agent Learning',
31 |       url='https://www.pommerman.com',
32 |       author='Pommerman',
33 |       author_email='support@pommerman.com',
34 |       license='Apache 2.0',
35 |       classifiers=[
36 |           'Programming Language :: Python :: 3.6',
37 |       ],
38 |       packages=find_packages(),
39 |       package_data = {'pommerman' : files },
40 |       install_requires=install_requires,
41 |       extras_require={
42 |         'extras': extras_require # @TODO this might need refinement
43 |       },
44 |       entry_points={
45 |         'console_scripts': [
46 |             'pom_battle=pommerman.cli.run_battle:main',
47 |             'pom_tf_battle=pommerman.cli.train_with_tensorforce:main',
48 |             'ion_client=pommerman.network.client:init',
49 |             'ion_server=pommerman.network.server:init'
50 |         ],
51 |       },
52 |       zip_safe=False)
53 | 


--------------------------------------------------------------------------------