├── .gitignore
├── LICENSE
├── README.md
├── novgrid
    ├── __init__.py
    ├── config.py
    ├── env_configs
    │   ├── __init__.py
    │   ├── generator.py
    │   └── json
    │   │   ├── door_key.json
    │   │   ├── door_key_change.json
    │   │   ├── increasing_num_crossings.json
    │   │   ├── sample.json
    │   │   └── simple_to_lava_to_simple_crossing.json
    ├── envs
    │   ├── __init__.py
    │   ├── colored_door_key.py
    │   └── novgrid_objects.py
    ├── example.py
    ├── novelty_env.py
    └── register_envs.py
├── novgrid_old
    ├── __init__.py
    ├── baselines
    │   ├── __init__.py
    │   ├── cnn_sample.py
    │   ├── models
    │   │   └── ppo_minigrid_example_model.zip
    │   ├── ppo_minigrid.py
    │   └── render_env.py
    ├── env_generator.py
    ├── envs
    │   ├── __init__.py
    │   ├── lavagapdoorkey.py
    │   └── multidoormultikey.py
    ├── novelty_generation
    │   ├── __init__.py
    │   ├── novelty_objs.py
    │   └── novelty_wrappers.py
    └── utils
    │   ├── __init__.py
    │   ├── baseline_utils.py
    │   ├── default.ini
    │   ├── novgrid_utils.py
    │   └── parser.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | logs
  2 | novgrid/baselines/logs/*
  3 | novgrid/baselines/logs
  4 | *.pyc
  5 | *__pycache__
  6 | *egg-info
  7 | trained_models
  8 | 
  9 | # PyPI
 10 | build/*
 11 | dist/*
 12 | .idea/
 13 | 
 14 | # wandb
 15 | *wandb*
 16 | *videos*
 17 | *runs*
 18 | 
 19 | # Vim
 20 | *.swp
 21 | 
 22 | # Byte-compiled / optimized / DLL files
 23 | __pycache__/
 24 | *.py[cod]
 25 | *$py.class
 26 | 
 27 | # C extensions
 28 | *.so
 29 | 
 30 | # Distribution / packaging
 31 | .Python
 32 | build/
 33 | develop-eggs/
 34 | dist/
 35 | downloads/
 36 | eggs/
 37 | .eggs/
 38 | lib/
 39 | lib64/
 40 | parts/
 41 | sdist/
 42 | var/
 43 | wheels/
 44 | pip-wheel-metadata/
 45 | share/python-wheels/
 46 | *.egg-info/
 47 | .installed.cfg
 48 | *.egg
 49 | MANIFEST
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .nox/
 65 | .coverage
 66 | .coverage.*
 67 | .cache
 68 | nosetests.xml
 69 | coverage.xml
 70 | *.cover
 71 | *.py,cover
 72 | .hypothesis/
 73 | .pytest_cache/
 74 | 
 75 | # Translations
 76 | *.mo
 77 | *.pot
 78 | 
 79 | # Django stuff:
 80 | *.log
 81 | local_settings.py
 82 | db.sqlite3
 83 | db.sqlite3-journal
 84 | 
 85 | # Flask stuff:
 86 | instance/
 87 | .webassets-cache
 88 | 
 89 | # Scrapy stuff:
 90 | .scrapy
 91 | 
 92 | # Sphinx documentation
 93 | docs/_build/
 94 | 
 95 | # PyBuilder
 96 | target/
 97 | 
 98 | # Jupyter Notebook
 99 | .ipynb_checkpoints
100 | 
101 | # IPython
102 | profile_default/
103 | ipython_config.py
104 | 
105 | # pyenv
106 | .python-version
107 | 
108 | # pipenv
109 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
110 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
111 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
112 | #   install all needed dependencies.
113 | #Pipfile.lock
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2022 Georgia Institute of Technology
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # NovGrid
  2 | 
  3 | Novelty MiniGrid (NovGrid) is an extension of [MiniGrid](https://github.com/Farama-Foundation/Minigrid) environment that allows for the world properties and dynamics to change according to a generalized novelty generator. The MiniGrid environment is a grid-world that facilitates reinforcement learning algorithm development with low environment integration overhead, which allows for rapid iteration and testing. In addition to necessary grid world objects of agents, floor, walls, and goals, MiniGrid implements actionable objects including doors, keys, balls, and boxes.  NovGrid extends the MiniGrid  environment by expanding the way the grid world and the agent interact to allow novelties to be injected into the environment. Specifically this is done by expanding the functionality of the actionable objects (doors, keys, lava, etc.) already in MiniGrid and creating a general environment wrapper that injects novelty at a certain point in the training process.
  4 | 
  5 | 
  6 | If you find this code useful, please reference in your paper:
  7 | 
  8 | ```
  9 | @inproceedings{balloch2022novgrid,
 10 |   title={NovGrid: A Flexible Grid World for Evaluating Agent Response to Novelty},
 11 |   author={Balloch, Jonathan and Lin, Zhiyu and Hussain, Mustafa and Srinivas, Aarun and Peng, Xiangyu and Kim, Julia and Riedl, Mark},
 12 |   booktitle={In Proceedings of AAAI Symposium, Designing Artificial Intelligence for Open Worlds},
 13 |   year={2022},
 14 | }
 15 | ```
 16 | 
 17 | ## Installing the NovGrid Package
 18 | Requirements:
 19 | - Python 3.8+
 20 | 
 21 | From the NovGrid base directory run:
 22 | ```shell
 23 | pip install -e .
 24 | ```
 25 | 
 26 | ## Examples using NovGrid
 27 | 
 28 | Here is an example that trains a [Stable Baselines3](https://stable-baselines3.readthedocs.io/en/master/) implementation of PPO on a NovGrid environment that experiences the DoorKeyChange novelty:
 29 | 
 30 | ```python
 31 | import gymnasium as gym
 32 | from stable_baselines3 import PPO
 33 | import minigrid
 34 | import novgrid
 35 | 
 36 | config = {
 37 |     'env_configs': 'door_key_change'
 38 |     'total_timesteps': 10000000,
 39 |     'novelty_step': 10000
 40 | }
 41 | 
 42 | env = novgrid.NoveltyEnv(
 43 |     env_configs=config['env_configs'],
 44 |     novelty_step=config['novelty_step'],
 45 |     wrappers=[minigrid.wrappers.FlatObsWrapper]
 46 | )
 47 | 
 48 | model = PPO('MlpPolicy', env)
 49 | model.learn(config['total_timesteps'])
 50 | ```
 51 | 
 52 | ## Testing the Installation
 53 | 
 54 | To run the sample environment set follow the instructions below. 
 55 | 
 56 | ```shell
 57 | python novgrid/example.py
 58 | ```
 59 | 
 60 | The expected output should be:
 61 | ```
 62 | pygame 2.5.2 (SDL 2.28.2, Python 3.8.18)
 63 | Hello from the pygame community. https://www.pygame.org/contribute.html
 64 | step_num: 0; env_idx: [0]; rewards: [0]; dones: [False]
 65 | step_num: 1; env_idx: [0]; rewards: [0]; dones: [False]
 66 | step_num: 2; env_idx: [0]; rewards: [0]; dones: [False]
 67 | step_num: 3; env_idx: [0]; rewards: [0]; dones: [False]
 68 | step_num: 4; env_idx: [0]; rewards: [0]; dones: [False]
 69 | step_num: 5; env_idx: [0]; rewards: [0]; dones: [False]
 70 | step_num: 6; env_idx: [0]; rewards: [0]; dones: [False]
 71 | step_num: 7; env_idx: [0]; rewards: [0]; dones: [False]
 72 | step_num: 8; env_idx: [0]; rewards: [0]; dones: [False]
 73 | step_num: 9; env_idx: [0]; rewards: [0]; dones: [False]
 74 | step_num: 10; env_idx: [1]; rewards: [0]; dones: [ True]
 75 | step_num: 11; env_idx: [1]; rewards: [0]; dones: [False]
 76 | step_num: 12; env_idx: [1]; rewards: [0]; dones: [False]
 77 | step_num: 13; env_idx: [1]; rewards: [0]; dones: [False]
 78 | step_num: 14; env_idx: [1]; rewards: [0]; dones: [False]
 79 | step_num: 15; env_idx: [1]; rewards: [0]; dones: [False]
 80 | step_num: 16; env_idx: [1]; rewards: [0]; dones: [False]
 81 | step_num: 17; env_idx: [1]; rewards: [0]; dones: [False]
 82 | step_num: 18; env_idx: [1]; rewards: [0]; dones: [False]
 83 | step_num: 19; env_idx: [1]; rewards: [0]; dones: [False]
 84 | step_num: 20; env_idx: [1]; rewards: [0]; dones: [False]
 85 | step_num: 21; env_idx: [2]; rewards: [0]; dones: [ True]
 86 | step_num: 22; env_idx: [2]; rewards: [0]; dones: [False]
 87 | step_num: 23; env_idx: [2]; rewards: [0]; dones: [False]
 88 | step_num: 24; env_idx: [2]; rewards: [0]; dones: [False]
 89 | step_num: 25; env_idx: [2]; rewards: [0]; dones: [False]
 90 | step_num: 26; env_idx: [2]; rewards: [0]; dones: [False]
 91 | step_num: 27; env_idx: [2]; rewards: [0]; dones: [False]
 92 | step_num: 28; env_idx: [2]; rewards: [0]; dones: [False]
 93 | step_num: 29; env_idx: [2]; rewards: [0]; dones: [False]
 94 | ```
 95 | 
 96 | ## Novelties
 97 | The following is a list and descriptions of the available novelty wrappers:
 98 | 
 99 | **GoalLocationChange**: This novelty changes the location of the goal object. In MiniGrid the Goal object is usually at fixed location.
100 | 
101 | **DoorLockToggle**: This novelty makes a door that is assumed to always be locked instead always unlocked and vice versa. In MiniGrid this is usually a static property. If a door that was unlocked before novelty injection is locked and requires a certain key after novelty injection, the policy learned before novelty injection will likely to fail. On the other hand, if novelty injection makes a previously locked door unlocked, an agent that does not ex- plore after novelty injection may always still seek out a key for a door that does not need it.
102 | 
103 | **DoorKeyChange**: This novelty changes which key that opens a locked door. In MiniGrid doors are always un- locked by keys of the same color as the door. This means that if key and door colors do not match after novelty, agents will have to find another key to open the door. This may cause a previously learned policy to fail until the agent learns to start using the other key.
104 | 
105 | **DoorNumKeys**: This novelty changes the number of keys needed to unlock a door. The default number of keys is one; this novelty tends to make policies fail because of the extra step of getting a second key.
106 | 
107 | **ImperviousToLava**: Lava becomes non-harmful, whereas in Minigrid lava always immediately ends the episode with no reward. This may result in new routes to the goal that potentially bypass doors.
108 | 
109 | **ActionRepetition**: This novelty changes the number of sequential timesteps an action will have to be repeated for it to occur. In MiniGrid it is usually assumed that for an action to occur it only needs to be issued once. So if an agent needed to command the pick-up action twice before novelty but only once afterwards, to reach its most effi- cient policy it would need to learn to not command pickup twice.
110 | 
111 | **ForwardMovementSpeed**: This novelty modifies the number of steps an agent takes each time the forward command is issued. In MiniGrid agents only move one gridsquare per time step. As a result, if the agent gets faster after novelty, the original policy may have a harder time controlling the agent, and will need to learn how to embrace this change that could make it reach the goal in fewer steps.
112 | 
113 | **ActionRadius**: This novelty is an example of a change to the relational preconditions of an action by changing the radius around the agent where an action works. In Mini- Grid this is usually assumed to be only a distance of one or zero, depending on the object. If an agent can pick up objects after novelty without being right next to them, it will have to realize this if it is to reach the optimum solu- tion.
114 | 
115 | **ColorRestriction**: This novelty restricts the objects one can interact with by color. In MiniGrid it is usually as- sumed that all objects can be interacted with. If an agent is trained with no blue interactions before novelty and then isn’t allowed to interact with yellow objects after novelty, the agent will have to learn to pay attention to the color of objects.
116 | 
117 | **Burdening**: This novelty changes the effect of actions based on whether the agent has any items in the inven- tory. In MiniGrid it is usually assumed that the inventory has no effect on actions. An agent experiencing this nov- elty, for example, might move twice as fast as usual when their inventory is empty, but half as fast as usual when in possession of the item, which it will have to compensate for strategically.
118 | 


--------------------------------------------------------------------------------
/novgrid/__init__.py:
--------------------------------------------------------------------------------
1 | from novgrid.novelty_env import NoveltyEnv
2 | 
3 | from novgrid.register_envs import register_novgrid_envs
4 | 
5 | register_novgrid_envs()
6 | 


--------------------------------------------------------------------------------
/novgrid/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | ENV_CONFIG_FILE = "sample"
 4 | TOTAL_TIME_STEPS = None
 5 | NOVELTY_STEP = 10
 6 | N_ENVS = 1
 7 | RENDER_DISPLAY = False
 8 | STEP_DELAY = 0.0
 9 | 
10 | 
11 | def make_parser() -> argparse.ArgumentParser:
12 |     """
13 |     Creates a default parser that contains everything that a novgrid environment would need.
14 | 
15 |     Returns:
16 |         argparse.ArgumentParser: The parser
17 |     """
18 |     parser = argparse.ArgumentParser()
19 | 
20 |     parser.add_argument(
21 |         "--env-configs-file",
22 |         "-ec",
23 |         type=str,
24 |         default=ENV_CONFIG_FILE,
25 |         help="Use the path to a json file containing the env configs here.",
26 |     )
27 |     parser.add_argument(
28 |         "--total-time-steps",
29 |         "-t",
30 |         type=int,
31 |         default=TOTAL_TIME_STEPS,
32 |         help="The total number of time steps to run.",
33 |     )
34 |     parser.add_argument(
35 |         "--novelty-step",
36 |         "-n",
37 |         type=int,
38 |         default=NOVELTY_STEP,
39 |         help="The total number of time steps to run in an environment before injecting the next novelty.",
40 |     )
41 |     parser.add_argument(
42 |         "--n-envs",
43 |         "-e",
44 |         type=int,
45 |         default=N_ENVS,
46 |         help="The number of envs to use when running the vectorized env.",
47 |     )
48 |     parser.add_argument(
49 |         "--render-display",
50 |         "-rd",
51 |         type=lambda s: s.lower() in {"yes", "true", "t", "y"},
52 |         default=RENDER_DISPLAY,
53 |         help="Whether or not to render the display of the environment as the agent is stepping.",
54 |     )
55 |     parser.add_argument(
56 |         "--step-delay",
57 |         "-sd",
58 |         type=float,
59 |         default=STEP_DELAY,
60 |         help="The amount of delay in seconds between each step call.",
61 |     )
62 | 
63 |     return parser
64 | 


--------------------------------------------------------------------------------
/novgrid/env_configs/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict, Any
 2 | 
 3 | import os.path
 4 | import json
 5 | 
 6 | 
 7 | def get_env_configs(name: str) -> List[Dict[str, Any]]:
 8 |     fname = f"{name}.json" if ".json" not in name else name
 9 |     full_fname = os.path.join(os.path.dirname(__file__), "json", fname)
10 |     with open(full_fname) as f:
11 |         return json.load(f)
12 | 


--------------------------------------------------------------------------------
/novgrid/env_configs/generator.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import os
  3 | 
  4 | import json
  5 | from typing import List, Any, Dict
  6 | import numpy as np
  7 | 
  8 | 
  9 | class Change(abc.ABC):
 10 |     """
 11 |     Abstract class representing a change to be applied during environment configuration generation.
 12 |     """
 13 | 
 14 |     def __init__(self) -> None:
 15 |         """Initialization method for the Change class."""
 16 |         pass
 17 | 
 18 |     @abc.abstractmethod
 19 |     def generate_value(self, i: int, num_tasks: int) -> Any:
 20 |         """
 21 |         Abstract method to generate a value based on the change.
 22 | 
 23 |         Args:
 24 |             i (int): Current iteration.
 25 |             num_tasks (int): Total number of tasks.
 26 | 
 27 |         Returns:
 28 |             Any: Generated value.
 29 |         """
 30 |         pass
 31 | 
 32 | 
 33 | class Constant(Change):
 34 |     """
 35 |     A constant value change to be applied during environment configuration generation.
 36 |     """
 37 | 
 38 |     def __init__(self, x: Any) -> None:
 39 |         """
 40 |         Initializes the Constant change with a fixed value.
 41 | 
 42 |         Args:
 43 |             x (Any): Constant value.
 44 |         """
 45 |         self.x = x
 46 | 
 47 |     def generate_value(self, i: int, num_tasks: int) -> Any:
 48 |         """
 49 |         Generates the constant value.
 50 | 
 51 |         Args:
 52 |             i (int): Current iteration.
 53 |             num_tasks (int): Total number of tasks.
 54 | 
 55 |         Returns:
 56 |             Any: Constant value.
 57 |         """
 58 |         return self.x
 59 | 
 60 | 
 61 | class IntRange(Change):
 62 |     """
 63 |     An integer range change to be applied during environment configuration generation.
 64 |     """
 65 | 
 66 |     def __init__(self, start: int, end: int, inclusive: bool = False) -> None:
 67 |         """
 68 |         Initializes the IntRange change with a specified range.
 69 | 
 70 |         Args:
 71 |             start (int): Start of the range.
 72 |             end (int): End of the range.
 73 |             inclusive (bool): Whether to include the end value in the range.
 74 |         """
 75 |         super().__init__()
 76 |         self.start = start
 77 |         self.end = end
 78 |         self.inclusive = inclusive
 79 | 
 80 |     def generate_value(self, i: int, num_tasks: int) -> int:
 81 |         """
 82 |         Generates an integer value within the specified range.
 83 | 
 84 |         Args:
 85 |             i (int): Current iteration.
 86 |             num_tasks (int): Total number of tasks.
 87 | 
 88 |         Returns:
 89 |             int: Generated integer value.
 90 |         """
 91 |         return (
 92 |             self.start + i * (self.end + int(self.inclusive) - self.start) // num_tasks
 93 |         )
 94 | 
 95 | 
 96 | class FloatRange(Change):
 97 |     """
 98 |     A float range change to be applied during environment configuration generation.
 99 |     """
100 | 
101 |     def __init__(self, start: float, end: float, inclusive: bool = False) -> None:
102 |         """
103 |         Initializes the FloatRange change with a specified range.
104 | 
105 |         Args:
106 |             start (float): Start of the range.
107 |             end (float): End of the range.
108 |             inclusive (bool): Whether to include the end value in the range.
109 |         """
110 |         super().__init__()
111 |         self.start = start
112 |         self.end = end
113 |         self.inclusive = inclusive
114 |         self._linspace_cache = {}
115 | 
116 |     def generate_value(self, i: int, num_tasks: int) -> float:
117 |         """
118 |         Generates a float value within the specified range.
119 | 
120 |         Args:
121 |             i (int): Current iteration.
122 |             num_tasks (int): Total number of tasks.
123 | 
124 |         Returns:
125 |             float: Generated float value.
126 |         """
127 |         if num_tasks not in self._linspace_cache:
128 |             self._linspace_cache[num_tasks] = np.linspace(
129 |                 self.start, self.end, num=num_tasks, endpoint=self.inclusive
130 |             )
131 |         return self._linspace_cache[num_tasks][i]
132 | 
133 | 
134 | class Toggle(Change):
135 |     """
136 |     A toggle change to be applied during environment configuration generation.
137 |     """
138 | 
139 |     def __init__(self, val1: Any = False, val2: Any = True) -> None:
140 |         """
141 |         Initializes the Toggle change with two values.
142 | 
143 |         Args:
144 |             val1 (Any): First value.
145 |             val2 (Any): Second value.
146 |         """
147 |         super().__init__()
148 |         self.val1 = val1
149 |         self.val2 = val2
150 | 
151 |     def generate_value(self, i: int, num_tasks: int) -> Any:
152 |         """
153 |         Generates one of the two values based on the iteration index.
154 | 
155 |         Args:
156 |             i (int): Current iteration.
157 |             num_tasks (int): Total number of tasks.
158 | 
159 |         Returns:
160 |             Any: Generated value.
161 |         """
162 |         return self.val1 if i % 2 == 0 else self.val2
163 | 
164 | 
165 | class ListChange(Change):
166 |     """
167 |     A list change to be applied during environment configuration generation.
168 |     """
169 | 
170 |     def __init__(self, lst: List[Any], use_snake_boundary: bool = False) -> None:
171 |         """
172 |         Initializes the ListChange with a list of values.
173 | 
174 |         Args:
175 |             lst (List[Any]): List of values.
176 |             use_snake_boundary (bool): Whether to use a snake-like boundary for the list.
177 |         """
178 |         super().__init__()
179 |         self.lst = lst
180 |         if use_snake_boundary:
181 |             self.lst = self.lst + self.lst[-2:0:-1]
182 | 
183 |     def generate_value(self, i: int, num_tasks: int) -> Any:
184 |         """
185 |         Generates a value from the list based on the iteration index.
186 | 
187 |         Args:
188 |             i (int): Current iteration.
189 |             num_tasks (int): Total number of tasks.
190 | 
191 |         Returns:
192 |             Any: Generated value.
193 |         """
194 |         return self.lst[i % len(self.lst)]
195 | 
196 | 
197 | class EnvConfigGenerator:
198 |     """
199 |     Class for generating environment configurations based on specified changes.
200 |     """
201 | 
202 |     def __init__(self, env_id: str, num_tasks: int, changes: Dict[str, Change]) -> None:
203 |         """
204 |         Initializes the EnvConfigGenerator with the base environment ID, number of tasks, and changes.
205 | 
206 |         Args:
207 |             env_id (str): Base environment ID.
208 |             num_tasks (int): Number of tasks to generate.
209 |             changes (Dict[str, Change]): Dictionary of changes to be applied.
210 |         """
211 |         self.base_env_id = env_id
212 |         self.num_tasks = num_tasks
213 |         self.changes = changes
214 | 
215 |     def generate_env_configs(self) -> List[Dict[str, Any]]:
216 |         """
217 |         Generates environment configurations based on the specified changes.
218 | 
219 |         Returns:
220 |             List[Dict[str, Any]]: List of environment configurations.
221 |         """
222 |         return [
223 |             {
224 |                 "env_id": self.base_env_id,
225 |                 **{
226 |                     k: v.generate_value(i, self.num_tasks)
227 |                     for k, v in self.changes.items()
228 |                 },
229 |             }
230 |             for i in range(self.num_tasks)
231 |         ]
232 | 
233 |     def save_env_configs(self, json_file_name: str) -> List[Dict[str, Any]]:
234 |         """
235 |         Generates and saves environment configurations to a JSON file.
236 | 
237 |         Args:
238 |             json_file_name (str): Name of the JSON file.
239 | 
240 |         Returns:
241 |             List[Dict[str, Any]]: List of environment configurations.
242 |         """
243 |         env_configs = self.generate_env_configs()
244 |         with open(json_file_name, "w") as f:
245 |             json.dump(env_configs, f, indent=2)
246 |         return env_configs
247 | 
248 |     def global_save_env_configs(
249 |         self, name: str, override_existing_file: bool = False
250 |     ) -> List[Dict[str, Any]]:
251 |         """
252 |         Generates and globally saves environment configurations.
253 | 
254 |         Args:
255 |             name (str): Name of the environment configuration.
256 |             override_existing_file (bool): Whether to override an existing configuration file.
257 | 
258 |         Returns:
259 |             List[Dict[str, Any]]: List of environment configurations.
260 |         """
261 |         fname = f"{name}.json" if ".json" not in name else name
262 |         full_fname = os.path.join(os.path.dirname(__file__), "json", fname)
263 |         if os.path.exists(full_fname) and not override_existing_file:
264 |             raise ValueError(
265 |                 f"The name {name} already has a global file for its env config. To override this file use the override_existing_file flag."
266 |             )
267 |         self.save_env_configs(full_fname)
268 | 
269 | 
270 | def test_generator_bool_toggle():
271 |     """
272 |     Test case for EnvConfigGenerator with a boolean toggle change.
273 |     """
274 |     expected_result = [
275 |         {"env_id": "LavaGrid", "lava_on": False},
276 |         {"env_id": "LavaGrid", "lava_on": True},
277 |         {"env_id": "LavaGrid", "lava_on": False},
278 |     ]
279 | 
280 |     result = EnvConfigGenerator(
281 |         env_id="LavaGrid", num_tasks=3, changes={"lava_on": Toggle()}
282 |     ).generate_env_configs()
283 | 
284 |     assert expected_result == result
285 | 
286 | 
287 | def test_generator_int_range():
288 |     """
289 |     Test case for EnvConfigGenerator with an integer range change.
290 |     """
291 |     expected_result = [
292 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1},
293 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2},
294 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 3},
295 |     ]
296 | 
297 |     result = EnvConfigGenerator(
298 |         env_id="MiniGrid-SimpleCrossingS9N0-v0",
299 |         num_tasks=3,
300 |         changes={"num_crossings": IntRange(1, 4)},
301 |     ).generate_env_configs()
302 | 
303 |     assert expected_result == result
304 | 
305 | 
306 | def test_generator_list_change():
307 |     """
308 |     Test case for EnvConfigGenerator with a list change.
309 |     """
310 |     expected_result = [
311 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1},
312 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2},
313 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 3},
314 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2},
315 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1},
316 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2},
317 |     ]
318 | 
319 |     result = EnvConfigGenerator(
320 |         env_id="MiniGrid-SimpleCrossingS9N0-v0",
321 |         num_tasks=6,
322 |         changes={"num_crossings": ListChange([1, 2, 3], use_snake_boundary=True)},
323 |     ).generate_env_configs()
324 | 
325 |     assert expected_result == result
326 | 
327 | 
328 | def test_generator_float_range():
329 |     """
330 |     Test case for EnvConfigGenerator with a float range change.
331 |     """
332 |     expected_result = [
333 |         {"env_id": "CartPole", "pole_weight": 5.0},
334 |         {"env_id": "CartPole", "pole_weight": 6.5},
335 |         {"env_id": "CartPole", "pole_weight": 8.0},
336 |         {"env_id": "CartPole", "pole_weight": 9.5},
337 |     ]
338 | 
339 |     result = EnvConfigGenerator(
340 |         env_id="CartPole",
341 |         num_tasks=4,
342 |         changes={"pole_weight": FloatRange(5.0, 9.5, inclusive=True)},
343 |     ).generate_env_configs()
344 | 
345 |     assert expected_result == result
346 | 
347 | 
348 | def test_generator_multi_change():
349 |     """
350 |     Test case for EnvConfigGenerator with multiple changes.
351 |     """
352 |     expected_result = [
353 |         {
354 |             "env_id": "MiniGrid-SimpleCrossingS9N0-v0",
355 |             "num_crossings": 1,
356 |             "test_constant": 5,
357 |         },
358 |         {
359 |             "env_id": "MiniGrid-SimpleCrossingS9N0-v0",
360 |             "num_crossings": 2,
361 |             "test_constant": 5,
362 |         },
363 |         {
364 |             "env_id": "MiniGrid-SimpleCrossingS9N0-v0",
365 |             "num_crossings": 3,
366 |             "test_constant": 5,
367 |         },
368 |     ]
369 | 
370 |     result = EnvConfigGenerator(
371 |         env_id="MiniGrid-SimpleCrossingS9N0-v0",
372 |         num_tasks=3,
373 |         changes={"num_crossings": IntRange(1, 4), "test_constant": Constant(5)},
374 |     ).generate_env_configs()
375 | 
376 |     assert expected_result == result
377 | 


--------------------------------------------------------------------------------
/novgrid/env_configs/json/door_key.json:
--------------------------------------------------------------------------------
1 | [
2 |   {
3 |     "env_id": "NovGrid-ColoredDoorKeyEnv"
4 |   }
5 | ]
6 | 


--------------------------------------------------------------------------------
/novgrid/env_configs/json/door_key_change.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "env_id": "NovGrid-ColoredDoorKeyEnv",
 4 |     "door_color": "red",
 5 |     "correct_key_color": "red",
 6 |     "key_colors": ["red", "blue"]
 7 |   },
 8 |   {
 9 |     "env_id": "NovGrid-ColoredDoorKeyEnv",
10 |     "door_color": "red",
11 |     "correct_key_color": "blue",
12 |     "key_colors": ["red", "blue"]
13 |   }
14 | ]
15 | 


--------------------------------------------------------------------------------
/novgrid/env_configs/json/increasing_num_crossings.json:
--------------------------------------------------------------------------------
1 | [
2 |   { "env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1 },
3 |   { "env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2 },
4 |   { "env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 3 }
5 | ]
6 | 


--------------------------------------------------------------------------------
/novgrid/env_configs/json/sample.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "env_id": "MiniGrid-Empty-16x16-v0",
 4 |     "size": 10
 5 |   },
 6 |   {
 7 |     "env_id": "MiniGrid-Empty-16x16-v0",
 8 |     "size": 8
 9 |   },
10 |   {
11 |     "env_id": "MiniGrid-Empty-16x16-v0"
12 |   }
13 | ]
14 | 


--------------------------------------------------------------------------------
/novgrid/env_configs/json/simple_to_lava_to_simple_crossing.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "env_id": "MiniGrid-SimpleCrossingS9N1-v0",
 4 |     "obstacle_type": "gridobj:Wall"
 5 |   },
 6 |   {
 7 |     "env_id": "MiniGrid-SimpleCrossingS9N1-v0",
 8 |     "obstacle_type": "gridobj:Lava"
 9 |   },
10 |   {
11 |     "env_id": "MiniGrid-SimpleCrossingS9N1-v0",
12 |     "obstacle_type": "gridobj:Wall"
13 |   }
14 | ]


--------------------------------------------------------------------------------
/novgrid/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from novgrid.envs.colored_door_key import ColoredDoorKeyEnv
2 | 


--------------------------------------------------------------------------------
/novgrid/envs/colored_door_key.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Any, Dict, List, SupportsFloat
 2 | 
 3 | from minigrid.core.grid import Grid
 4 | from minigrid.core.world_object import Door, Goal, Key
 5 | from minigrid.core.mission import MissionSpace
 6 | from minigrid.minigrid_env import MiniGridEnv
 7 | 
 8 | from novgrid.envs.novgrid_objects import ColorDoor
 9 | 
10 | 
11 | class ColoredDoorKeyEnv(MiniGridEnv):
12 | 
13 |     def __init__(
14 |         self,
15 |         door_color: str = "yellow",
16 |         key_colors: Optional[List[str]] = None,
17 |         correct_key_color: str = "yellow",
18 |         size: int = 8,
19 |         max_steps: Optional[int] = None,
20 |         **kwargs: Dict[str, Any]
21 |     ):
22 |         self.door_color = door_color
23 |         self.key_colors = key_colors if key_colors is not None else [correct_key_color]
24 |         self.correct_key_color = correct_key_color
25 |         if max_steps is None:
26 |             max_steps = 10 * size**2
27 |         mission_space = MissionSpace(mission_func=self._gen_mission)
28 |         super().__init__(
29 |             mission_space=mission_space, grid_size=size, max_steps=max_steps, **kwargs
30 |         )
31 | 
32 |     @staticmethod
33 |     def _gen_mission():
34 |         return "use the correct key to open the door and get to the goal"
35 | 
36 |     def step(self, action):
37 |         return super().step(action)
38 | 
39 |     def _gen_grid(self, width: int, height: int):
40 |         # Create an empty grid
41 |         self.grid = Grid(width=width, height=height)
42 | 
43 |         # Generate the surrounding walls
44 |         self.grid.wall_rect(0, 0, width, height)
45 | 
46 |         # Place a goal in the bottom right corner
47 |         self.put_obj(Goal(), width - 2, height - 2)
48 | 
49 |         # Create a vertical splitting wall
50 |         splitIdx = self._rand_int(2, width - 2)
51 |         self.grid.vert_wall(splitIdx, 0)
52 | 
53 |         # Place the agent at a random position and orientation on the left side
54 |         self.place_agent(size=(splitIdx, height))
55 | 
56 |         # Place a door in the wall
57 |         doorIdx = self._rand_int(1, width - 2)
58 |         self.put_obj(
59 |             ColorDoor(
60 |                 self.door_color, is_locked=True, key_color=self.correct_key_color
61 |             ),
62 |             splitIdx,
63 |             doorIdx,
64 |         )
65 | 
66 |         # Place a yellow key on the left side
67 |         for color in self.key_colors:
68 |             self.place_obj(obj=Key(color=color), top=(0, 0), size=(splitIdx, height))
69 | 
70 |         self.mission = self._gen_mission()
71 | 


--------------------------------------------------------------------------------
/novgrid/envs/novgrid_objects.py:
--------------------------------------------------------------------------------
 1 | from minigrid.core.world_object import *
 2 | 
 3 | class ColorDoor(Door):
 4 |     """
 5 |     A Door instance where the key color can be specified and doesn't have to match the door
 6 |     """
 7 |     def __init__(self, color, is_open=False, is_locked=False, key_color=None):
 8 |         super().__init__(color, is_open, is_locked)
 9 |         self.is_open = is_open
10 |         self.is_locked = is_locked
11 |         if key_color:
12 |             self.key_color = key_color
13 |         else:
14 |             self.key_color = color
15 | 
16 |     def toggle(self, env, pos):
17 |         # If the player has the right key to open the door
18 |         if self.is_locked:
19 |             if isinstance(env.carrying, Key) and env.carrying.color == self.key_color:
20 |                 self.is_locked = False
21 |                 self.is_open = True
22 |                 return True
23 |             return False
24 | 
25 |         self.is_open = not self.is_open
26 |         return True


--------------------------------------------------------------------------------
/novgrid/example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import time
 3 | 
 4 | from novgrid import NoveltyEnv
 5 | from novgrid.config import make_parser
 6 | 
 7 | 
 8 | def run_example(
 9 |     args: argparse.Namespace,
10 | ) -> None:
11 |     """Run an example run with random actions to test a given configuration
12 | 
13 |     Args:
14 |         args (argparse.Namespace): The args from the default parser
15 |     """
16 |     env = NoveltyEnv(
17 |         env_configs=args.env_configs_file,
18 |         novelty_step=args.novelty_step,
19 |         n_envs=args.n_envs,
20 |         render_mode="human" if args.render_display else None,
21 |     )
22 | 
23 |     env.reset()
24 | 
25 |     if args.total_time_steps is None:
26 |         total_time_steps = (env.num_transfers + 1) * args.novelty_step
27 |     else:
28 |         total_time_steps = args.total_time_steps
29 | 
30 |     for step_num in range(0, total_time_steps, args.n_envs):
31 |         observations, rewards, dones, infos = env.step(
32 |             [env.action_space.sample() for _ in range(args.n_envs)]
33 |         )
34 |         if args.render_display:
35 |             env.render("human")
36 |         print(
37 |             f"step_num: {step_num}; env_idx: {env.get_attr('env_idx')}; rewards: {rewards}; dones: {dones}"
38 |         )
39 | 
40 |         if args.step_delay > 0:
41 |             time.sleep(args.step_delay)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     parser = make_parser()
46 |     args = parser.parse_args()
47 | 
48 |     run_example(args=args)
49 | 


--------------------------------------------------------------------------------
/novgrid/novelty_env.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, List, Optional, SupportsFloat, Tuple, Dict, Union
  2 | 
  3 | import os
  4 | 
  5 | import gymnasium as gym
  6 | from gymnasium.envs.registration import EnvSpec
  7 | import json
  8 | import numpy as np
  9 | import inspect
 10 | 
 11 | from stable_baselines3.common.monitor import Monitor
 12 | from stable_baselines3.common.vec_env import SubprocVecEnv
 13 | from stable_baselines3.common.vec_env.base_vec_env import VecEnvStepReturn
 14 | 
 15 | from novgrid.env_configs import get_env_configs
 16 | import novgrid.envs.novgrid_objects as novgrid_objects
 17 | 
 18 | 
 19 | class ListEnv(gym.Env):
 20 |     """
 21 |     A vectorized environment that chains multiple environments together.
 22 | 
 23 |     Attributes:
 24 |         env_lst (List[gymnasium.Env]): List of environments to chain.
 25 |         env_idx (int): Index of the current environment.
 26 |     """
 27 | 
 28 |     def __init__(self, env_lst: List[gym.Env]) -> None:
 29 |         """
 30 |         Initializes the ListEnv with a list of environments.
 31 | 
 32 |         Args:
 33 |             env_lst (List[gymnasium.Env]): List of environments to chain.
 34 |         """
 35 |         self.env_lst = env_lst
 36 |         self.env_idx = 0
 37 | 
 38 |     def incr_env_idx(self) -> bool:
 39 |         """
 40 |         Increments the environment index, closing the current environment and resetting to the next one.
 41 | 
 42 |         Returns:
 43 |             bool: True if the environment index was successfully incremented, False otherwise.
 44 |         """
 45 |         if self.env_idx >= len(self.env_lst) - 1:
 46 |             return False
 47 |         self.cur_env.close()
 48 |         self.env_idx += 1
 49 |         self.cur_env.reset()
 50 |         return True
 51 | 
 52 |     def step(
 53 |         self, action: Any
 54 |     ) -> Tuple[Any, SupportsFloat, bool, bool, Dict[str, Any]]:
 55 |         """
 56 |         Takes a step in the current environment.
 57 | 
 58 |         Args:
 59 |             action (Any): Action to take.
 60 | 
 61 |         Returns:
 62 |             Tuple[Any, SupportsFloat, bool, bool, Dict[str, Any]]: Step information.
 63 |         """
 64 |         return self.cur_env.step(action=action)
 65 | 
 66 |     def reset(
 67 |         self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None
 68 |     ) -> Tuple[Any, Dict[str, Any]]:
 69 |         """
 70 |         Resets the current environment.
 71 | 
 72 |         Args:
 73 |             seed (Optional[int]): Seed for environment reset.
 74 |             options (Optional[Dict[str, Any]]): Additional options for reset.
 75 | 
 76 |         Returns:
 77 |             Tuple[Any, Dict[str, Any]]: Reset information.
 78 |         """
 79 |         return self.cur_env.reset(seed=seed, options=options)
 80 | 
 81 |     def render(self) -> Union[gym.core.RenderFrame, List[gym.core.RenderFrame], None]:
 82 |         """
 83 |         Renders the current environment.
 84 | 
 85 |         Returns:
 86 |             Union[gymnasium.core.RenderFrame, List[gymnasium.core.RenderFrame], None]: Rendered frame(s).
 87 |         """
 88 |         return self.cur_env.render()
 89 | 
 90 |     def close(self) -> None:
 91 |         """Closes all environments in the list."""
 92 |         for env in self.env_lst:
 93 |             env.close()
 94 | 
 95 |     @property
 96 |     def cur_env(self) -> gym.Env:
 97 |         """
 98 |         Gets the current environment.
 99 | 
100 |         Returns:
101 |             gymnasium.Env: Current environment.
102 |         """
103 |         return self.env_lst[self.env_idx]
104 | 
105 |     @property
106 |     def unwrapped(self) -> gym.Env:
107 |         """
108 |         Gets the unwrapped version of the current environment.
109 | 
110 |         Returns:
111 |             gymnasium.Env: Unwrapped current environment.
112 |         """
113 |         return self.cur_env
114 | 
115 |     @property
116 |     def action_space(self) -> gym.Space:
117 |         """
118 |         Gets the action space of the current environment.
119 | 
120 |         Returns:
121 |             gymnasium.Space: Action space.
122 |         """
123 |         return self.cur_env.action_space
124 | 
125 |     @property
126 |     def observation_space(self) -> gym.Space:
127 |         """
128 |         Gets the observation space of the current environment.
129 | 
130 |         Returns:
131 |             gymnasium.Space: Observation space.
132 |         """
133 |         return self.cur_env.observation_space
134 | 
135 |     @property
136 |     def reward_range(self) -> Tuple[SupportsFloat, SupportsFloat]:
137 |         """
138 |         Gets the reward range of the current environment.
139 | 
140 |         Returns:
141 |             Tuple[SupportsFloat, SupportsFloat]: Reward range.
142 |         """
143 |         return self.cur_env.reward_range
144 | 
145 |     @property
146 |     def spec(self) -> EnvSpec:
147 |         """
148 |         Gets the spec of the current environment.
149 | 
150 |         Returns:
151 |             gymnasium.EnvSpec: Environment specification.
152 |         """
153 |         return self.cur_env.spec
154 | 
155 |     @property
156 |     def np_random(self) -> np.random.RandomState:
157 |         """
158 |         Gets the random number generator of the current environment.
159 | 
160 |         Returns:
161 |             np.random.RandomState: Random number generator.
162 |         """
163 |         return self.cur_env.np_random
164 | 
165 |     @property
166 |     def render_mode(self) -> Optional[str]:
167 |         """
168 |         Gets the render mode of the current environment.
169 | 
170 |         Returns:
171 |             Optional[str]: Render mode.
172 |         """
173 |         return self.cur_env.render_mode
174 | 
175 | 
176 | class NoveltyEnv(SubprocVecEnv):
177 |     """
178 |     A vectorized environment with novelty injection based on specified intervals.
179 | 
180 |     Attributes:
181 |         novelty_step (int): Number of time steps between novelty injections.
182 |         n_envs (int): Number of environments to run in parallel.
183 |         print_novelty_box (bool): Whether to print a novelty injection box.
184 |         num_transfers (int): Number of transfers between environments.
185 |         total_time_steps (int): Total time steps taken.
186 |         last_incr (int): Time step of the last environment index increment.
187 |         start_index (int): Starting index for environment creation.
188 |         monitor_dir (Optional[str]): Directory for monitoring results.
189 |     """
190 | 
191 |     def __init__(
192 |         self,
193 |         env_configs: Union[str, List[Dict[str, Any]]],
194 |         novelty_step: int,
195 |         wrappers: List[gym.Wrapper] = [],
196 |         wrapper_kwargs_lst: List[Dict[str, Any]] = [],
197 |         n_envs: int = 1,
198 |         seed: Optional[int] = None,
199 |         start_index: int = 0,
200 |         monitor_dir: Optional[str] = None,
201 |         monitor_kwargs: Optional[str] = None,
202 |         start_method: Optional[str] = None,
203 |         print_novelty_box: bool = False,
204 |         render_mode: Optional[str] = None,
205 |     ):
206 |         """
207 |         Initializes the NoveltyEnv with the provided configurations.
208 | 
209 |         Args:
210 |             env_configs (Union[str, List[Dict[str, Any]]]): Configuration for environments.
211 |             novelty_step (int): Number of time steps between novelty injections.
212 |             wrappers (List[gymnasium.Wrapper]): List of wrappers to apply to each environment.
213 |             wrapper_kwargs_lst (List[Dict[str, Any]]): List of wrapper kwargs for each wrapper.
214 |             n_envs (int): Number of environments to run in parallel.
215 |             seed (Optional[int]): Random seed.
216 |             start_index (int): Starting index for environment creation.
217 |             monitor_dir (Optional[str]): Directory for monitoring results.
218 |             monitor_kwargs (Optional[str]): Additional kwargs for monitoring.
219 |             start_method (Optional[str]): Start method for parallel environments.
220 |             print_novelty_box (bool): Whether to print a novelty injection box.
221 |             render_mode (Optional[str]): Render mode for environments.
222 |         """
223 |         if type(env_configs) == str:
224 |             if os.path.exists(env_configs):
225 |                 with open(env_configs, "r") as f:
226 |                     env_configs = json.load(f)
227 |             else:
228 |                 env_configs = get_env_configs(env_configs)
229 | 
230 |         world_objects = {
231 |             k.lower(): v
232 |             for k, v in inspect.getmembers(
233 |                 novgrid_objects,
234 |                 lambda obj: inspect.isclass(obj)
235 |                 and issubclass(obj, novgrid_objects.WorldObj),
236 |             )
237 |         }
238 | 
239 |         for cfg in env_configs:
240 |             for k, v in cfg.items():
241 |                 if (
242 |                     type(v) == str
243 |                     and v.startswith("gridobj:")
244 |                     and v.split(":")[-1].lower() in world_objects
245 |                 ):
246 |                     cfg[k] = world_objects[v.split(":")[-1].lower()]
247 | 
248 |         self.novelty_step = novelty_step
249 |         self.n_envs = n_envs
250 |         self.n_tasks = len(env_configs)
251 |         self.print_novelty_box = print_novelty_box
252 |         self.num_transfers = len(env_configs) - 1
253 | 
254 |         self.total_time_steps = 0
255 |         self.last_incr = 0
256 | 
257 |         self.start_index = start_index
258 |         self.monitor_dir = monitor_dir
259 |         monitor_kwargs = {} if monitor_kwargs is None else monitor_kwargs
260 | 
261 |         def make_env_fn(rank):
262 |             def _make_env(config):
263 |                 env_id = config["env_id"]
264 |                 env_kwargs = {k: v for k, v in config.items() if k != "env_id"}
265 | 
266 |                 # Initialize the environment
267 |                 if isinstance(env_id, str):
268 |                     env = gym.make(env_id, render_mode=render_mode, **env_kwargs)
269 |                 else:
270 |                     env = env_id(**env_kwargs, render_mode=render_mode)
271 | 
272 |                 # Optionally use the random seed provided
273 |                 if seed is not None:
274 |                     env.seed(seed + rank)
275 |                     env.action_space.seed(seed + rank)
276 | 
277 |                 # Wrap the env in a Monitor wrapper
278 |                 # to have additional training information
279 |                 monitor_path = (
280 |                     os.path.join(monitor_dir, str(rank))
281 |                     if monitor_dir is not None
282 |                     else None
283 |                 )
284 |                 # Create the monitor folder if needed
285 |                 if monitor_path is not None:
286 |                     os.makedirs(monitor_path, exist_ok=True)
287 |                 env = Monitor(env, filename=monitor_path, **monitor_kwargs)
288 | 
289 |                 # Wrap the environment with the provided wrappers
290 |                 for wrapper_cls, wrapper_kwargs in zip(
291 |                     wrappers,
292 |                     wrapper_kwargs_lst
293 |                     + [{}] * max(0, len(wrappers) - len(wrapper_kwargs_lst)),
294 |                 ):
295 |                     env = wrapper_cls(env, **wrapper_kwargs)
296 | 
297 |                 return env
298 | 
299 |             def _init():
300 |                 # Returns a list env with each env constructed from the config in env_configs
301 |                 return ListEnv([_make_env(config) for config in env_configs])
302 | 
303 |             return _init
304 | 
305 |         env_fns = [make_env_fn(rank=i + start_index) for i in range(n_envs)]
306 | 
307 |         if start_method is None:
308 |             import multiprocessing as mp
309 |             start_method = "fork" if "fork" in mp.get_all_start_methods() else None
310 | 
311 |         super().__init__(env_fns=env_fns, start_method=start_method)
312 | 
313 |     def step(self, actions: np.ndarray) -> VecEnvStepReturn:
314 |         """
315 |         Takes a step in the parallel environments.
316 | 
317 |         Args:
318 |             actions (np.ndarray): Actions for each environment.
319 | 
320 |         Returns:
321 |             VecEnvStepReturn: The observations, rewards, dones, and infos from each environment
322 |         """
323 |         observations, rewards, dones, infos = super().step(actions)
324 |         # Increment total time steps
325 |         self.total_time_steps += self.n_envs
326 |         if self.total_time_steps - self.last_incr > self.novelty_step:
327 |             self.last_incr = self.total_time_steps
328 |             # Trigger the novelty if enough steps have passed
329 |             novelty_injected = self.env_method("incr_env_idx")
330 |             dones[:] = True
331 | 
332 |             if np.any(novelty_injected) and self.print_novelty_box:
333 |                 s = f"| Novelty Injected (on env {self.get_attr('env_idx')}) |"
334 |                 print("-" * len(s))
335 |                 print(s)
336 |                 print("-" * len(s))
337 | 
338 |         return observations, rewards, dones, infos
339 | 


--------------------------------------------------------------------------------
/novgrid/register_envs.py:
--------------------------------------------------------------------------------
 1 | from gymnasium import Env
 2 | from gymnasium.envs.registration import register
 3 | import novgrid.envs as envs
 4 | import inspect
 5 | 
 6 | 
 7 | def register_novgrid_envs() -> None:
 8 |     """
 9 |     Registers all the novgrid environments with gymnasium
10 |     """
11 |     [
12 |         register(id=f"NovGrid-{name}", entry_point=f"novgrid.envs:{name}")
13 |         for name, _ in inspect.getmembers(
14 |             envs, lambda obj: inspect.isclass(obj) and issubclass(obj, Env)
15 |         )
16 |     ]
17 | 


--------------------------------------------------------------------------------
/novgrid_old/__init__.py:
--------------------------------------------------------------------------------
1 | # Import the envs module so that envs register themselves
2 | import novgrid.envs
3 | 
4 | # Import wrappers so it's accessible when installing with pip
5 | import novgrid.novelty_generation
6 | import novgrid
7 | 


--------------------------------------------------------------------------------
/novgrid_old/baselines/__init__.py:
--------------------------------------------------------------------------------
1 | import minigrid_novelty_generator


--------------------------------------------------------------------------------
/novgrid_old/baselines/cnn_sample.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from datetime import datetime
  3 | import gym_minigrid  # MUST BE IMPORTED TO SEE ENVIRONMENTS
  4 | from gym_minigrid.wrappers import ImgObsWrapper
  5 | import torch as th
  6 | import wandb
  7 | from wandb.integration.sb3 import WandbCallback
  8 | 
  9 | from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor
 10 | from stable_baselines3.common.vec_env.vec_transpose import VecTransposeImage
 11 | from stable_baselines3 import PPO
 12 | from stable_baselines3.common.callbacks import EvalCallback, CallbackList
 13 | from stable_baselines3.common.env_util import make_vec_env
 14 | 
 15 | from novgrid.utils.parser import getparser
 16 | from novgrid.utils.novgrid_utils import make_env
 17 | from novgrid.utils.baseline_utils import MinigridCNN
 18 | from novgrid.novelty_generation.novelty_wrappers import *
 19 | 
 20 | 
 21 | def main(args):
 22 |     if args.device:
 23 |         device = th.device(args.device)
 24 |     else:
 25 |         device = th.device('cuda' if th.cuda.is_available() else 'cpu')
 26 |     # Set up tracking and logging
 27 |     now = datetime.now()
 28 |     dt_string = now.strftime("%Y-%m-%d_%H-%M-%S")
 29 |     if args.saves_logs == 'logs':
 30 |         defaults = getparser([])
 31 |         if defaults != args:
 32 |             logstr = ''
 33 |             for key, value in args.__dict__.items():
 34 |                 if defaults.__dict__[key] != value:
 35 |                     elemstr = str(key) + '=' + str(value) + '_'
 36 |                     logstr += elemstr
 37 |     else:
 38 |         logstr = args.saves_logs
 39 | 
 40 | 
 41 |     log_dir = os.path.abspath('./logs/' + logstr + '_' + dt_string)
 42 |     os.makedirs(log_dir)
 43 | 
 44 |     if args.wandb_track:
 45 |         wandb_config = {
 46 |             'total_timesteps': args.total_timesteps,
 47 |             'env_name': args.env,
 48 |             'novelty_wrapper': args.novelty_wrapper,
 49 |             'novelty_episode': args.novelty_episode,
 50 |             'args': args
 51 |         }
 52 |         wandb.tensorboard.patch(root_logdir=log_dir, pytorch=True)
 53 |         wandb_run = wandb.init(
 54 |             project='novgrid_baselines',
 55 |             entity="balloch",
 56 |             settings=wandb.Settings(start_method="fork"),
 57 |             name=logstr + '_' + dt_string,
 58 |             dir='./logs/',
 59 |             config=wandb_config,
 60 |             sync_tensorboard=True,
 61 |             monitor_gym=True
 62 |         )
 63 | 
 64 | 
 65 |     env_wrappers = [ImgObsWrapper]
 66 |     wrappers_args = [{}]
 67 | 
 68 |     n_envs = args.num_workers
 69 | 
 70 |     if args.novelty_wrapper:
 71 |         novelty_wrapper = eval(args.novelty_wrapper)
 72 |         env_wrappers = [novelty_wrapper] + env_wrappers
 73 |         wrappers_args.append({})
 74 |         env_list = [make_env(env_name=args.env,
 75 |                              wrappers=env_wrappers,
 76 |                              wrapper_args=wrappers_args,
 77 |                              novelty_episode=args.novelty_episode) for _ in range(n_envs)]
 78 |         env = VecMonitor(DummyVecEnv(env_list))
 79 |     elif n_envs > 1:
 80 |         print('try make_vec_env')
 81 |         # This only works with a single wrapper for some reason.
 82 |         env = make_vec_env(args.env,
 83 |                            n_envs=n_envs,
 84 |                            seed=0,
 85 |                            wrapper_class=env_wrappers[0])
 86 |     else:
 87 |         env_list = [make_env(env_name=args.env,
 88 |                              wrappers=env_wrappers,
 89 |                              novelty_episode=args.novelty_episode) for _ in range(args.num_workers)]
 90 |         env = VecMonitor(DummyVecEnv(env_list))
 91 | 
 92 |     # Set up and create model
 93 |     policy_kwargs = dict(
 94 |         features_extractor_class=MinigridCNN,
 95 |         features_extractor_kwargs=dict(features_dim=128), )
 96 |     model = PPO("CnnPolicy",
 97 |                 env,
 98 |                 policy_kwargs=policy_kwargs,
 99 |                 learning_rate=args.learning_rate,
100 |                 verbose=1,
101 |                 tensorboard_log=log_dir,
102 |                 device=device)
103 |     if args.load_model:
104 |         print(f'loading model {args.load_model}')
105 |         model.set_parameters(args.load_model)
106 | 
107 |     # Set up experiment callbacks
108 |     eval_callback = EvalCallback(
109 |         VecTransposeImage(env),
110 |         best_model_save_path=log_dir,
111 |         log_path=log_dir,
112 |         eval_freq=round(args.eval_interval/n_envs),
113 |         deterministic=True,
114 |         render=False)
115 |     callback_list = [eval_callback]
116 | 
117 |     if args.wandb_track:
118 |         tracking_callback = WandbCallback(
119 |             gradient_save_freq=10,
120 |             model_save_path=wandb_run.dir, #'/datadrive/wandb_tmp/',
121 |             model_save_freq=10000,
122 |             verbose=2)
123 |         callback_list.append(tracking_callback)
124 |         # wandb.watch(sb_policy)
125 | 
126 |     all_callback = CallbackList(callback_list)
127 | 
128 |     # Run Experiments!
129 |     for exp in range(args.num_exp):
130 |         model.learn(
131 |             total_timesteps=args.total_timesteps,
132 |             log_interval=args.log_interval,
133 |             tb_log_name='run_{}'.format(exp),
134 |             callback=all_callback,
135 |         )
136 |         model.save(log_dir + '/' + 'run_{}'.format(exp) + '_final_model')
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     config_args = getparser()
141 |     main(config_args)
142 | 


--------------------------------------------------------------------------------
/novgrid_old/baselines/models/ppo_minigrid_example_model.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eilab-gt/NovGrid/5873d2148b246ba9433307e8791ab794c0d7ca57/novgrid_old/baselines/models/ppo_minigrid_example_model.zip


--------------------------------------------------------------------------------
/novgrid_old/baselines/ppo_minigrid.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | 
 4 | import gym_minigrid  # MUST BE IMPORTED TO SEE ENVIRONMENTS
 5 | from gym_minigrid.wrappers import FlatObsWrapper
 6 | import torch as th
 7 | from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor
 8 | from stable_baselines3 import PPO
 9 | 
10 | from novgrid.utils.parser import getparser
11 | from novgrid.utils.novgrid_utils import make_env
12 | from novgrid.novelty_generation.novelty_wrappers import *
13 | 
14 | device = th.device('cuda' if th.cuda.is_available() else 'cpu')
15 | 
16 | 
17 | def main(args):
18 |     # Set up tracking
19 |     now = datetime.now()
20 |     dt_string = now.strftime("%d-%m-%Y_%H-%M-%S")
21 |     log_dir = os.path.abspath('./logs/' + args.saves_logs + '_' + dt_string)
22 |     os.makedirs(log_dir)
23 | 
24 |     # Create environments
25 |     novelty_wrapper = eval(args.novelty_wrapper)
26 |     env_wrappers = [novelty_wrapper, FlatObsWrapper]
27 |     env_list = [make_env(args.env, log_dir, env_wrappers, args.novelty_episode) for _ in range(args.num_workers)]
28 |     env = VecMonitor(DummyVecEnv(env_list))
29 | 
30 |     # Set up and create model
31 |     model = PPO("MlpPolicy",
32 |                 env,
33 |                 learning_rate=args.learning_rate,
34 |                 verbose=1,
35 |                 tensorboard_log=log_dir,
36 |                 device=device)
37 |     if args.load_model:
38 |         print(f'loading model {args.load_model}')
39 |         model.set_parameters(args.load_model)
40 | 
41 |     for exp in range(args.num_exp):
42 |         model.learn(
43 |             total_timesteps=args.total_timesteps,
44 |             tb_log_name='run_{}'.format(exp)
45 |         )
46 |         model.save(log_dir + '/' + 'run_{}'.format(exp) + '_final_model')
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     config_args = getparser()
51 |     main(config_args)
52 | 


--------------------------------------------------------------------------------
/novgrid_old/baselines/render_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | import novgrid
 5 | import gym_minigrid
 6 | import gym
 7 | from PIL import Image
 8 | from gym_minigrid.wrappers import *
 9 | 
10 | 
11 | env_name = 'MiniGrid-LavaShortcutMaze8x8-v0'
12 | # env = RGBImgPartialObsWrapper(env)
13 | # env = ImgObsWrapper(env)
14 | 
15 | 
16 | env = gym.make(env_name)
17 | env.reset()
18 | outs = env.step(1)
19 | outs2 = env.step(1)
20 | outs3 = env.step(2)
21 | 
22 | 
23 | # Simple rendering
24 | img = Image.fromarray(env.render('rgb_array'),'RGB')
25 | img.show()
26 | 
27 | # ## Video rendering with timing
28 | # t0 = time.time()
29 | # num_frames=5000
30 | # images = []
31 | # for i in range(num_frames):
32 | #     img = Image.fromarray(env.render('rgb_array'),'RGB')
33 | #     images.append(img)
34 | #     # img.show()
35 | #     obs, reward, done, info = env.step(0)
36 | # images[0].save(env_name+'out.gif',
37 | #                save_all=True,
38 | #                append_images=images[1:],
39 | #                optimize=False,
40 | #                duration=40,
41 | #                loop=0)
42 | # t1 = time.time()
43 | # dt = t1 - t0
44 | # frames_per_sec = num_frames / dt
45 | #
46 | # print('Rendering FPS : {:.0f}'.format(frames_per_sec))
47 | 


--------------------------------------------------------------------------------
/novgrid_old/env_generator.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List, Union, Any, Tuple
 2 | 
 3 | import numpy as np
 4 | 
 5 | def generate_config_json(
 6 |     base_env: str,
 7 |     num_tasks: int = 0,
 8 |     change_vars: Optional[List[str]] = None,
 9 |     change_types: Optional[List[type]] = None,
10 |     change_ranges: Optional[List[Union[Tuple[Any, Any], None]]] = None,
11 | ):
12 |     """
13 |     base_env : str, the name of the env_id
14 |     num_tasks : the number of changes (so the number of resulting environments is cahnge_count+1),
15 |     change_vars : the list of kwarg variable names to change,
16 |     change_types : the list of types of the kwarg variables to change, options 'bool', 'int', or 'float'
17 |     change_ranges : the ranges of the kwarg variables to change if type is not bool.
18 |                     Each must be len == 2 or None for bool.
19 |                     For an int will return the largest subinterval divisible by num_taskss.
20 |     """
21 |     if change_ranges is not None:
22 |         for r in change_ranges:
23 |             assert r is None or len(r) == 2
24 | 
25 |     json_data = []
26 |     # for n in range(num_tasks):
27 |     var_values = {}
28 |     for idx, var in enumerate(change_vars):
29 |         if change_types[idx] is bool:
30 |             var_values[var] = [bool(i % 2) for i in range(num_tasks)]
31 |         elif change_types[idx] is int:
32 |             var_values[var] = [
33 |                 val * (change_ranges[idx][1] - change_ranges[idx][0]) // num_tasks
34 |                 + change_ranges[idx][0]
35 |                 for val in range(num_tasks)
36 |             ]
37 |         elif change_types[idx] is float:
38 |             var_values[var] = list(np.linspace(*change_ranges[idx]), num_tasks)
39 |         else:
40 |             raise TypeError
41 |     for i in range(num_tasks):
42 |         json_data.append(
43 |             {
44 |                 "env_id": base_env,
45 |                 **dict(map(lambda x: (x[0], x[1][i]), var_values.items())),
46 |             }
47 |         )
48 |     return json_data
49 | 
50 | 
51 | def assert_value(ground_truth, value):
52 |     try:
53 |         assert ground_truth == value
54 |     except:
55 |         print("Test Failed!")
56 |         print("Expected:", ground_truth)
57 |         print("Received:", value)
58 | 
59 | 
60 | def test1():
61 |     case1 = [
62 |         {"env_id": "LavaGrid", "lava_on": False},
63 |         {"env_id": "LavaGrid", "lava_on": True},
64 |         {"env_id": "LavaGrid", "lava_on": False},
65 |     ]
66 | 
67 |     case2 = [
68 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1},
69 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2},
70 |         {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 3},
71 |     ]
72 | 
73 |     assert_value(case1, generate_config_json("LavaGrid", 3, ["lava_on"], [bool]))
74 |     assert_value(
75 |         case2,
76 |         generate_config_json(
77 |             "MiniGrid-SimpleCrossingS9N0-v0", 3, ["num_crossings"], [int], [(1, 4)]
78 |         ),
79 |     )
80 | 


--------------------------------------------------------------------------------
/novgrid_old/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from novgrid.envs.multidoormultikey import *
2 | from novgrid.envs.lavagapdoorkey import *


--------------------------------------------------------------------------------
/novgrid_old/envs/lavagapdoorkey.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | import numpy as np
  4 | 
  5 | MAXIMUM_SIZE_F = 10 # Gap needs only 4, when door key involved make it 10.
  6 | 
  7 | 
  8 | class LavaGapDoorKeyEnv(MiniGridEnv):
  9 |     """
 10 |     Environment with a door and key with one wall of lava with a small gap to cross through
 11 |     sparse reward
 12 |     """
 13 |     def __init__(self, size, obstacle_type=Lava, seed=None):
 14 |         self.obstacle_type = obstacle_type
 15 |         self.fixed_env = False
 16 |         self.simple_reward = False
 17 |         self.no_door_key = False # True for gap.
 18 | 
 19 |         super().__init__(
 20 |             grid_size=size,
 21 |             max_steps=MAXIMUM_SIZE_F*size*size,
 22 |             # Set this to True for maximum speed
 23 |             see_through_walls=False,
 24 |             seed=seed
 25 |         )
 26 | 
 27 |     def _gen_grid(self, width, height):
 28 |         # Create an empty grid
 29 |         self.grid = Grid(width, height)
 30 | 
 31 |         # Generate the surrounding walls
 32 |         self.grid.wall_rect(0, 0, width, height)
 33 | 
 34 |         # Place a goal in the bottom-right corner
 35 |         self.put_obj(Goal(), width - 2, height - 2)
 36 | 
 37 |         if self.fixed_env:
 38 |             # Create a vertical splitting wall
 39 |             splitIdx = width//2 - 1
 40 |             self.gap_pos = np.array((2,3))
 41 |             self.grid.vert_wall(splitIdx+3, 2, width // 3, self.obstacle_type)
 42 |             doorIdx = height//2 - 1
 43 | 
 44 |         else:
 45 |             # Create a vertical splitting wall
 46 |             splitIdx = self._rand_int(2, width-2)
 47 |             # Place the obstacle wall
 48 |             if splitIdx > width // 3 + 1:
 49 |                 self.gap_pos = np.array((
 50 |                     self._rand_int(1, width // 3),
 51 |                     self._rand_int(1, height - 2),
 52 |                 ))
 53 |                 self.grid.horz_wall(self.gap_pos[0], self.gap_pos[1], width // 3, self.obstacle_type)
 54 |             else:
 55 |                 self.gap_pos = np.array((
 56 |                     self._rand_int(1, width - 2),
 57 |                     self._rand_int(1, height // 3),
 58 |                 ))
 59 |                 self.grid.vert_wall(self.gap_pos[0], self.gap_pos[1], height // 3, self.obstacle_type)
 60 |             doorIdx = self._rand_int(1, height - 2)
 61 |             # Place a door in the wall
 62 |             while abs(doorIdx - self.gap_pos[1]) < 2:
 63 |                 doorIdx = self._rand_int(1, height-2)
 64 | 
 65 | 
 66 |         self.grid.vert_wall(splitIdx, 0)
 67 |         if self.no_door_key:
 68 |             # Put a hole in the wall
 69 |             self.grid.set(splitIdx, doorIdx, None)
 70 |         else:
 71 |             self.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx)
 72 |             if self.fixed_env:
 73 |                 self.put_obj(Key('yellow'), 2, 4)
 74 |             else:
 75 |                 # Place a yellow key on the left side
 76 |                 self.place_obj(
 77 |                     obj=Key('yellow'),
 78 |                     top=(0, 0),
 79 |                     size=(splitIdx, height)
 80 |                 )
 81 | 
 82 |         # Place the agent at a random position and orientation
 83 |         # on the left side of the splitting wall
 84 |         self.place_agent(size=(splitIdx, height))
 85 | 
 86 |         self.mission = "Avoid the lava and use the key to open the door and then get to the goal"
 87 | 
 88 |     def _reward(self):
 89 |         """
 90 |         Compute the reward to be given upon success
 91 |         """
 92 |         agent_pos = self.agent_pos
 93 |         object = self.grid.get(agent_pos[0], agent_pos[1])
 94 |         if (object.type == 'lava') and ():
 95 |             return -1 # Add Negative reward for stepping on Lava.
 96 | 
 97 |         if self.simple_reward:
 98 |             return 1
 99 |         else:
100 |             return (1 - 0.9 * (self.step_count / self.max_steps)) * 10
101 | 
102 | 
103 | class LavaShortcutMaze(MiniGridEnv):
104 |     """
105 |     Environment with a door and key with one wall of lava with a small gap to cross through
106 |     sparse reward
107 |     """
108 |     def __init__(self, size, obstacle_type=Lava, seed=None):
109 |         self.obstacle_type = obstacle_type
110 |         self.fixed_env = True
111 |         self.simple_reward = False
112 | 
113 |         super().__init__(
114 |             grid_size=size,
115 |             max_steps=MAXIMUM_SIZE_F*size*size,
116 |             # Set this to True for maximum speed
117 |             see_through_walls=False,
118 |             seed=seed
119 |         )
120 | 
121 |     def _gen_grid(self, width, height):
122 |         # Create an empty grid
123 |         self.grid = Grid(width, height)
124 | 
125 |         # Generate the surrounding walls
126 |         self.grid.wall_rect(0, 0, width, height)
127 | 
128 |         # first vertical walls
129 |         first_wall_width = 2
130 |         splitIdx = width//2
131 |         self.grid.vert_wall(first_wall_width, 2, height-3)
132 | 
133 |         if width > 6:
134 |             for extra_wall_pos in range(1,(width-5)//2+1):
135 |                 if extra_wall_pos % 2 == 0:
136 |                     self.grid.vert_wall(first_wall_width+extra_wall_pos*2, 2, height - 3)
137 |                     # Place a goal in the bottom-right corner
138 |                     # self.put_obj(Goal(), width - 2, height - 2)
139 |                 else:
140 |                     self.grid.vert_wall(first_wall_width+extra_wall_pos*2, 0, height - 3)
141 |                     # Place a goal in the top-right corner
142 |                     # self.put_obj(Goal(), width - 2, 1)
143 |         else:
144 |             pass
145 |         # Place a goal in the bottom-right corner
146 |         self.put_obj(Goal(), width - 2, height - 2)
147 | 
148 |         # Create a horizontal lava
149 |         self.grid.horz_wall(2, height-2, width-4, Lava)
150 | 
151 |         # Place the agent at a fixed bottom left position
152 |         # and random orientation
153 |         self.place_agent(top=(0, height-2),
154 |                          size=(first_wall_width, height))
155 | 
156 |         self.mission = "Avoid the lava and use the key to open the door and then get to the goal"
157 | 
158 |     def _reward(self):
159 |         """
160 |         Compute the reward to be given upon success
161 |         """
162 |         agent_pos = self.agent_pos
163 |         object = self.grid.get(agent_pos[0], agent_pos[1])
164 |         if object.type == "lava":
165 |             return -1 # Add Negative reward for stepping on Lava.
166 | 
167 |         if self.simple_reward:
168 |             return 1
169 |         else:
170 |             return (1 - 0.9 * (self.step_count / self.max_steps))  # * 10
171 | 
172 | class LavaSafeMaze8x8(LavaShortcutMaze):
173 |     def __init__(self):
174 |         super().__init__(size=8)
175 | 
176 |     def _reward(self):
177 |         """
178 |         Compute the reward to be given upon success
179 |         """
180 |         # agent_pos = self.agent_pos
181 |         # object = self.grid.get(agent_pos[0], agent_pos[1])
182 |         # if object.type == "lava":
183 |         #     return -1 # NO Negative reward for stepping on Lava.
184 | 
185 |         if self.simple_reward:
186 |             return 1
187 |         else:
188 |             return (1 - 0.9 * (self.step_count / self.max_steps))  # * 10
189 | 
190 |     def step(self, action, **kwargs):
191 |         fwd_pos = self.front_pos
192 |         fwd_cell = self.grid.get(*fwd_pos)
193 |         obs, reward, done, info = super().step(action)
194 |         if done and fwd_cell and fwd_cell.type == 'lava':
195 |             self.agent_pos = fwd_pos
196 |             obs = self.gen_obs()
197 |             done = False
198 |         return obs, reward, done, info
199 | 
200 | 
201 | 
202 | class LavaGapDoorKeyEnv5x5(LavaGapDoorKeyEnv):
203 |     def __init__(self):
204 |         super().__init__(size=5)
205 | 
206 | 
207 | class LavaGapDoorKeyEnv6x6(LavaGapDoorKeyEnv):
208 |     def __init__(self):
209 |         super().__init__(size=6)
210 | 
211 | 
212 | class LavaGapDoorKeyEnv8x8(LavaGapDoorKeyEnv):
213 |     def __init__(self):
214 |         super().__init__(size=8)
215 | 
216 | 
217 | class LavaGapDoorKeyEnv16x16(LavaGapDoorKeyEnv):
218 |     def __init__(self):
219 |         super().__init__(size=16)
220 | 
221 | 
222 | class LavaShortcutMaze6x6(LavaShortcutMaze):
223 |     def __init__(self):
224 |         super().__init__(size=6)
225 | 
226 | 
227 | class LavaShortcutMaze7x7(LavaShortcutMaze):
228 |     def __init__(self):
229 |         super().__init__(size=7)
230 | 
231 | 
232 | class LavaShortcutMaze8x8(LavaShortcutMaze):
233 |     def __init__(self):
234 |         super().__init__(size=8)
235 | 
236 | 
237 | class LavaShortcutMaze9x9(LavaShortcutMaze):
238 |     def __init__(self):
239 |         super().__init__(size=9)
240 | 
241 | 
242 | 
243 | # register(
244 | #     id='MiniGrid-LavaGapDoorKeyEnv5x5-v0',
245 | #     entry_point='novgrid.envs:LavaGapDoorKeyEnv5x5'
246 | # )
247 | # print("hello")
248 | 
249 | register(
250 |     id='MiniGrid-LavaGapDoorKeyEnv6x6-v0',
251 |     entry_point='novgrid.envs:LavaGapDoorKeyEnv6x6'
252 | )
253 | 
254 | register(
255 |     id='MiniGrid-LavaGapDoorKeyEnv8x8-v0',
256 |     entry_point='novgrid.envs:LavaGapDoorKeyEnv8x8'
257 | )
258 | 
259 | register(
260 |     id='MiniGrid-LavaGapDoorKeyEnv16x16-v0',
261 |     entry_point='novgrid.envs:LavaGapDoorKeyEnv16x16'
262 | )
263 | 
264 | ######
265 | 
266 | register(
267 |     id='MiniGrid-LavaShortcutMaze6x6-v0',
268 |     entry_point='novgrid.envs:LavaShortcutMaze6x6'
269 | )
270 | 
271 | register(
272 |     id='MiniGrid-LavaShortcutMaze7x7-v0',
273 |     entry_point='novgrid.envs:LavaShortcutMaze7x7'
274 | )
275 | 
276 | register(
277 |     id='MiniGrid-LavaShortcutMaze8x8-v0',
278 |     entry_point='novgrid.envs:LavaShortcutMaze8x8'
279 | )
280 | 
281 | register(
282 |     id='MiniGrid-LavaShortcutMaze9x9-v0',
283 |     entry_point='novgrid.envs:LavaShortcutMaze9x9'
284 | )
285 | 
286 | register(
287 |     id='MiniGrid-LavaSafeMaze8x8-v0',
288 |     entry_point='novgrid.envs:LavaSafeMaze8x8'
289 | )
290 | 


--------------------------------------------------------------------------------
/novgrid_old/envs/multidoormultikey.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.envs import DoorKeyEnv
 2 | from gym_minigrid.register import register
 3 | from gym_minigrid.minigrid import Key, Grid, Door, Goal, COLORS
 4 | from matplotlib.pyplot import grid
 5 | import numpy as np
 6 | 
 7 | 
 8 | class MultiDoorMultiKeyEnv(DoorKeyEnv):
 9 |     def __init__(self, size=6, doors=1, keys=1, determ=False, seed=13, locked=None):
10 |         if (doors > (size - 3)) or (keys > (size - 3)):
11 |             raise ValueError("Both doors:{} and keys:{} must be less than size-3:{}".format(doors, keys, size))
12 |         elif doors > 6 or keys > 6:
13 |             raise ValueError("Both doors:{} and keys:{} must be less than 6".format(doors, keys))
14 |         self.doors = doors
15 |         self.keys = keys
16 |         self.seed_value = seed
17 |         self.determ = determ
18 |         if self.determ:
19 |             rand_num_gen = np.random.default_rng(self.seed_value)
20 |             self.door_idxs = rand_num_gen.choice(size - 3, size=self.doors, replace=False) + 1
21 |             self.key_widths = rand_num_gen.choice(size, size=self.keys)
22 |             self.key_heights = rand_num_gen.choice(size, size=self.keys)
23 |             self.split_idx = rand_num_gen.integers(low=2, high=size - 2)
24 |         super().__init__(size=size)
25 | 
26 |     def _gen_grid(self, width, height):
27 |         # Create an empty grid
28 |         self.grid = Grid(width, height)
29 | 
30 |         # Generate the surrounding walls
31 |         self.grid.wall_rect(0, 0, width, height)
32 | 
33 |         # Place a goal in the bottom-right corner
34 |         self.put_obj(Goal(), width - 2, height - 2)
35 | 
36 |         # Create a vertical splitting wall
37 |         if self.determ:
38 |             split_idx = self.split_idx
39 |         else:
40 |             split_idx = self._rand_int(2, width - 2)
41 |         self.grid.vert_wall(split_idx, 0)
42 | 
43 |         # Place the agent at a random position and orientation
44 |         # on the left side of the splitting wall
45 |         self.place_agent(size=(split_idx, height))
46 | 
47 |         ## Place doors and keys
48 |         ## Warning: for Python < 3.5 dict order is non-deterministic
49 |         colors = list(COLORS.keys())
50 |         rand_num_gen = np.random.default_rng(self.seed_value)
51 |         # place_obj drops the object randomly in a rectangle
52 |         # put_obj puts an object in a specific place
53 |         for door in range(self.doors):
54 |             if self.determ:
55 |                 door_idx = self.door_idxs[door]
56 |             else:
57 |                 door_idx = None
58 |                 while not door_idx or isinstance(self.grid.get(split_idx, door_idx), Door):
59 |                   door_idx = rand_num_gen.choice(height - 3) + 1
60 |             self.put_obj(Door(colors[door], is_locked=True), split_idx, door_idx)
61 | 
62 |         for key in range(self.keys):
63 |             if self.determ:
64 |                 self.put_obj(Key(colors[key]), self.key_widths[key], self.key_heights[key])
65 |             self.place_obj(obj=Key(colors[key]), top=(0, 0), size=(split_idx, height))
66 | 
67 |         self.mission = "use the key to open the same color door and then get to the goal"
68 | 
69 | class DoorMultiKeyEnv5x5(DoorKeyEnv):
70 |     def __init__(self):
71 |         super().__init__(size=5, doors=2, keys=2)
72 | 
73 | class DoorMultiKeyEnv6x6(DoorKeyEnv):
74 |     def __init__(self):
75 |         super().__init__(size=6, doors=2, keys=2)
76 | 
77 | class DoorMultiKeyEnv16x16(DoorKeyEnv):
78 |     def __init__(self):
79 |         super().__init__(size=16, doors=2, keys=2)
80 | 
81 | register(
82 |     id='MiniGrid-DoorMultiKey-5x5-v0',
83 |     entry_point='novgrid.envs:DoorMultiKeyEnvEnv5x5'
84 | )
85 | 
86 | register(
87 |     id='MiniGrid-DoorMultiKey-6x6-v0',
88 |     entry_point='novgrid.envs:DoorMultiKeyEnv6x6'
89 | )
90 | 
91 | register(
92 |     id='MiniGrid-DoorMultiKey-16x16-v0',
93 |     entry_point='novgrid.envs:DoorMultiKeyEnv16x16'
94 | )
95 | 


--------------------------------------------------------------------------------
/novgrid_old/novelty_generation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eilab-gt/NovGrid/5873d2148b246ba9433307e8791ab794c0d7ca57/novgrid_old/novelty_generation/__init__.py


--------------------------------------------------------------------------------
/novgrid_old/novelty_generation/novelty_objs.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import Key, Door
 2 | 
 3 | 
 4 | class ColorDoor(Door):
 5 |     """
 6 |     A Door instance where the key color can be specified and doesn't have to match the door
 7 |     """
 8 |     def __init__(self, color, is_open=False, is_locked=False, key_color=None):
 9 |         super().__init__(color, is_open, is_locked)
10 |         self.is_open = is_open
11 |         self.is_locked = is_locked
12 |         if key_color:
13 |             self.key_color = key_color
14 |         else:
15 |             self.key_color = color
16 | 
17 |     def toggle(self, env, pos):
18 |         # If the player has the right key to open the door
19 |         if self.is_locked:
20 |             if isinstance(env.carrying, Key) and env.carrying.color == self.key_color:
21 |                 self.is_locked = False
22 |                 self.is_open = True
23 |                 return True
24 |             return False
25 | 
26 |         self.is_open = not self.is_open
27 |         return True
28 | 
29 | 
30 | class MultiKeyDoor(Door):
31 |     """
32 |     A Door instance where multiple keys are required to unlock the door
33 |     """
34 |     def __init__(self, color, is_open=False, is_locked=False, key_colors=None):
35 |         super().__init__(color, is_open, is_locked)
36 |         self.is_open = is_open
37 |         self.is_locked = is_locked
38 |         if key_colors:
39 |             self.key_colors = key_colors
40 |         else:
41 |             self.key_colors = color
42 |             
43 |         
44 |     def toggle(self, env, pos):
45 |         if self.is_locked:
46 |             if isinstance(env.carrying, Key) and env.carrying.color in self.key_colors:
47 |                 self.key_colors.remove(env.carrying.color)
48 |                 if len(self.key_colors) == 0:
49 |                     self.is_locked = False
50 |                     self.is_open = True
51 |                     return True
52 |             return False
53 | 
54 |         self.is_open = not self.is_open
55 |         return True


--------------------------------------------------------------------------------
/novgrid_old/novelty_generation/novelty_wrappers.py:
--------------------------------------------------------------------------------
  1 | # change the self.mission
  2 | # you should be able to specify the exact novelty AND that there should be a random novelty
  3 | import abc
  4 | import gym
  5 | import numpy as np
  6 | 
  7 | from .novelty_objs import ColorDoor, MultiKeyDoor
  8 | from gym_minigrid.minigrid import Key, Grid, Door, Goal
  9 | 
 10 | 
 11 | class NoveltyWrapper(gym.core.Wrapper):
 12 |     """
 13 |     Wrapper to modify the environment according to novelty ontology at a certain point
 14 |     If novelty_episode = 0 (default) then there is no novelty
 15 |     This make the assumption--which is valid for all standard Minigrid environments as of 2021
 16 |     that MiniGrid environments do not ever overload the `reset()` gym function. So, we will assume
 17 |     that all relevant novelties are to be implemented in the `_post_novelty_gen_grid` function,
 18 |     which is called by `_post_novelty_reset` after the novelty episode is reached.
 19 |     """
 20 | 
 21 |     def __init__(self, env, novelty_episode=-1, novelty_step=-1):
 22 |         super().__init__(env)
 23 |         ## ensure that one and only one of novelty_step or novelty_episode is used
 24 |         assert novelty_episode > 0 or novelty_step > 0
 25 |         assert not (novelty_episode > 0 and novelty_step > 0)
 26 |         self.novelty_episode = novelty_episode
 27 |         self.novelty_step = novelty_step
 28 |         if novelty_step != -1:
 29 |             self.novelty_flag = 'step'
 30 |         else:
 31 |             self.novelty_flag = 'episode'
 32 |         self.num_episodes = 0
 33 |         self.num_steps = 0
 34 |         self.post_novelty = False
 35 | 
 36 |     def reset(self, **kwargs):
 37 |         # don't count resets that have no steps
 38 |         if self.unwrapped.step_count:
 39 |             self.num_episodes += 1
 40 |             self.num_steps += self.unwrapped.step_count
 41 |         # if episode matches, inject novelty and record step size
 42 |         if self.num_episodes >= self.novelty_episode:
 43 |             if self.post_novelty is False:
 44 |                 print('############################')
 45 |                 print('##### Novelty Injected #####')
 46 |                 print(f'##### Step {self.num_steps} #####')
 47 |                 print('############################')
 48 |                 self.novelty_step = self.num_steps
 49 |                 self.post_novelty = True
 50 |             # self.env.reset(**kwargs)
 51 |             return self._post_novelty_reset(**kwargs)
 52 |         else:
 53 |             return self.env.reset(**kwargs)
 54 | 
 55 |     def _post_novelty_reset(self, **kwargs):
 56 |         # Current position and direction of the agent
 57 |         #todo all this should be unwrapped
 58 |         self.env.agent_pos = None
 59 |         self.env.agent_dir = None
 60 | 
 61 |         # Generate a new random grid at the start of each episode
 62 |         # To keep the same grid for each episode, call env.seed() with
 63 |         # the same seed before calling env.reset()
 64 |         self._post_novelty_gen_grid(self.width, self.height, **kwargs)
 65 | 
 66 |         # These fields should be defined by _gen_grid
 67 |         assert self.env.agent_pos is not None
 68 |         assert self.env.agent_dir is not None
 69 | 
 70 |         # Check that the agent doesn't overlap with an object
 71 |         start_cell = self.env.grid.get(*self.env.agent_pos)
 72 |         assert start_cell is None or start_cell.can_overlap()
 73 | 
 74 |         # Item picked up, being carried, initially nothing
 75 |         self.env.carrying = None
 76 | 
 77 |         # Step count since episode start
 78 |         self.env.step_count = 0
 79 | 
 80 |         # Return first observation
 81 |         obs = self.env.gen_obs()
 82 |         return obs
 83 | 
 84 |     # @abc.abstractmethod
 85 |     def _post_novelty_gen_grid(self, width, height):
 86 |         """
 87 |         This is the main function where you implement the novelty
 88 |         """
 89 |         return self.unwrapped._gen_grid(width, height)
 90 |         # raise NotImplementedError
 91 | 
 92 |     def _rand_int(self, low, high):
 93 |         return self.env.np_random.randint(low, high)
 94 | 
 95 | 
 96 | class DoorKeyChange(NoveltyWrapper):
 97 | 
 98 |     def __init__(self, env, novelty_episode):
 99 |         super().__init__(env, novelty_episode)
100 | 
101 |     def _post_novelty_gen_grid(self, width, height):
102 |         # Create an empty grid
103 |         self.env.grid = Grid(width, height)
104 | 
105 |         # Generate the surrounding walls
106 |         self.env.grid.wall_rect(0, 0, width, height)
107 | 
108 |         # Place a goal in the bottom-right corner
109 |         self.env.put_obj(Goal(), width - 2, height - 2)
110 | 
111 |         # Create a vertical splitting wall
112 |         splitIdx = self._rand_int(2, width - 2)
113 |         self.env.grid.vert_wall(splitIdx, 0)
114 | 
115 |         # Place the agent at a random position and orientation
116 |         # on the left side of the splitting wall
117 |         self.env.place_agent(size=(splitIdx, height))
118 | 
119 |         # Place a door in the wall
120 |         doorIdx = self._rand_int(1, width-2)
121 |         # Yellow door object that will open when toggled with a blue key
122 |         self.env.put_obj(ColorDoor('yellow', is_locked=True, key_color='blue'), splitIdx, doorIdx)
123 | 
124 |         # Place a yellow key on the left side
125 |         self.env.place_obj(
126 |             obj=Key('yellow'),
127 |             top=(0, 0),
128 |             size=(splitIdx, height)
129 |         )
130 | 
131 |         # Place a blue key on the left side
132 |         self.env.place_obj(
133 |             obj=Key('blue'),
134 |             top=(0, 0),
135 |             size=(splitIdx, height)
136 |         )
137 | 
138 |         self.env.mission = "use different color key to open the door and then get to the goal"
139 | 
140 | 
141 | class DoorLockToggle(NoveltyWrapper):
142 | 
143 |     def __init__(self, env, novelty_episode):
144 |         super().__init__(env, novelty_episode)
145 | 
146 |     def _post_novelty_gen_grid(self, width, height):
147 |         # Create an empty grid
148 |         self.env.grid = Grid(width, height)
149 | 
150 |         # Generate the surrounding walls
151 |         self.env.grid.wall_rect(0, 0, width, height)
152 | 
153 |         # Place a goal in the bottom-right corner
154 |         self.env.put_obj(Goal(), width - 2, height - 2)
155 | 
156 |         # Create a vertical splitting wall
157 |         splitIdx = self._rand_int(2, width - 2)
158 |         self.env.grid.vert_wall(splitIdx, 0)
159 | 
160 |         # Place the agent at a random position and orientation
161 |         # on the left side of the splitting wall
162 |         self.env.place_agent(size=(splitIdx, height))
163 | 
164 |         # Place a door in the wall
165 |         doorIdx = self._rand_int(1, width - 2)
166 |         # Yellow door object that is already unlocked
167 |         self.env.put_obj(Door('yellow', is_locked=False), splitIdx, doorIdx)
168 | 
169 |         # Place a yellow key on the left side
170 |         self.env.place_obj(
171 |             obj=Key('yellow'),
172 |             top=(0, 0),
173 |             size=(splitIdx, height)
174 |         )
175 | 
176 |         self.env.mission = "go through the unlocked door and then get to the goal"
177 | 
178 | 
179 | class DoorNumKeys(NoveltyWrapper):
180 | 
181 |     def __init__(self, env, novelty_episode):
182 |         super().__init__(env, novelty_episode)
183 | 
184 |     def _post_novelty_gen_grid(self, width, height):
185 |         # Create an empty grid
186 |         self.env.grid = Grid(width, height)
187 | 
188 |         # Generate the surrounding walls
189 |         self.env.grid.wall_rect(0, 0, width, height)
190 | 
191 |         # Place a goal in the bottom-right corner
192 |         self.env.put_obj(Goal(), width - 2, height - 2)
193 | 
194 |         # Create a vertical splitting wall
195 |         splitIdx = self._rand_int(3, width - 2)
196 |         self.env.grid.vert_wall(splitIdx, 0)
197 | 
198 |         # Place the agent at a random position and orientation
199 |         # on the left side of the splitting wall
200 |         self.env.place_agent(size=(splitIdx, height))
201 | 
202 |         # Place a door in the wall
203 |         doorIdx = self._rand_int(1, width-2)
204 |         # Yellow door that requires a yellow key and a blue key to be opened
205 |         self.env.put_obj(MultiKeyDoor(
206 |             'yellow', 
207 |             is_locked=True, 
208 |             key_colors=['yellow', 'blue']), 
209 |         splitIdx, doorIdx)
210 | 
211 |         # Place a yellow key on the left side
212 |         self.env.place_obj(
213 |             obj=Key('yellow'),
214 |             top=(0, 0),
215 |             size=(splitIdx, height)
216 |         )
217 | 
218 |         # Place a blue key on the left side
219 |         self.env.place_obj(
220 |             obj=Key('blue'),
221 |             top=(0, 0),
222 |             size=(splitIdx, height)
223 |         )
224 | 
225 |         self.env.mission = "use two keys to open the door and then get to the goal"
226 | 
227 | 
228 | class GoalLocationChange(NoveltyWrapper):
229 | 
230 |     def __init__(self, env, novelty_episode):
231 |         super().__init__(env, novelty_episode)
232 | 
233 |     def _post_novelty_gen_grid(self, width, height):
234 |         # Create an empty grid
235 |         self.env.grid = Grid(width, height)
236 | 
237 |         # Generate the surrounding walls
238 |         self.env.grid.wall_rect(0, 0, width, height)
239 | 
240 |         # Changes the location of the goal from the bottom-right corner to the top-right corner
241 |         self.env.put_obj(Goal(), width - 2, 1)
242 | 
243 |         # Create a vertical splitting wall
244 |         splitIdx = self._rand_int(2, width-2)
245 |         self.env.grid.vert_wall(splitIdx, 0)
246 | 
247 |         # Place the agent at a random position and orientation
248 |         # on the left side of the splitting wall
249 |         self.env.place_agent(size=(splitIdx, height))
250 | 
251 |         # Place a door in the wall
252 |         doorIdx = self._rand_int(1, width-2)
253 |         self.env.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx)
254 | 
255 |         # Place a yellow key on the left side
256 |         self.env.place_obj(
257 |             obj=Key('yellow'),
258 |             top=(0, 0),
259 |             size=(splitIdx, height)
260 |         )
261 | 
262 |         self.env.mission = "use the key to open the door and then get to the goal whose location has changed"
263 | 
264 | 
265 | class ImperviousToLava(NoveltyWrapper):
266 | 
267 |     def __init__(self, env, novelty_episode):
268 |         super().__init__(env, novelty_episode)
269 | 
270 |     # def reset(self, **kwargs):
271 |     #     self.num_episodes += 1
272 |     #     return self.env.reset(**kwargs)
273 | 
274 |     def step(self, action, **kwargs):
275 |         if self.post_novelty:
276 |             fwd_pos = self.env.front_pos
277 |             fwd_cell = self.env.grid.get(*fwd_pos)
278 |             obs, reward, done, info = self.env.step(action, **kwargs)
279 |             if done and fwd_cell and fwd_cell.type == 'lava':
280 |                 self.env.agent_pos = fwd_pos
281 |                 obs = self.env.gen_obs()['image']
282 |                 done = False
283 |             return obs, reward, done, info
284 |         return self.env.step(action, **kwargs)
285 | 
286 | 
287 | class LavaHurts(NoveltyWrapper):
288 |     """
289 |     for an environment where lava doesn't hurt already
290 |     """
291 |     def __init__(self, env, novelty_episode):
292 |         super().__init__(env, novelty_episode)
293 | 
294 |     # def reset(self, **kwargs):
295 |     #     self.num_episodes += 1
296 |     #     return self.env.reset(**kwargs)
297 | 
298 |     def step(self, action, **kwargs):
299 |         if self.post_novelty:
300 |             fwd_pos = self.env.front_pos
301 |             fwd_cell = self.env.grid.get(*fwd_pos)
302 |             obs, reward, done, info = self.env.step(action, **kwargs)
303 |             if fwd_cell and fwd_cell.type == 'lava':
304 |                 self.env.agent_pos = fwd_pos
305 |                 obs = self.env.gen_obs()['image']
306 |                 done = True
307 |             return obs, reward, done, info
308 |         return self.env.step(action, **kwargs)
309 | 
310 | 
311 | class ForwardMovementSpeed(NoveltyWrapper):
312 | 
313 |     def __init__(self, env, novelty_episode):
314 |         super().__init__(env, novelty_episode)
315 | 
316 |     # def reset(self, **kwargs):
317 |     #     self.num_episodes += 1
318 |     #     return self.env.reset(**kwargs)
319 | 
320 |     def step(self, action, **kwargs):
321 |         if self.post_novelty:
322 |             if action == self.env.actions.forward:
323 |                 obs, reward, done, info = self.env.step(action, **kwargs)
324 |                 if done:
325 |                     return obs, reward, done, info
326 |                 self.env.step_count -= 1
327 |         return self.env.step(action, **kwargs)
328 | 
329 | 
330 | class ActionReptition(NoveltyWrapper):
331 | 
332 |     def __init__(self, env, novelty_episode):
333 |         super().__init__(env, novelty_episode)
334 |         self.prev_action = None
335 | 
336 |     # def reset(self, **kwargs):
337 |     #     self.num_episodes += 1
338 |     #     return self.env.reset(**kwargs)
339 | 
340 |     def step(self, action, **kwargs):
341 |         if self.post_novelty:
342 |             if action != self.prev_action:
343 |                 self.prev_action = action
344 |                 return self.env.step(self.env.actions.done)
345 |             self.prev_action = None
346 |         return self.env.step(action, **kwargs)
347 |     
348 |     
349 | class ActionRadius(NoveltyWrapper):
350 | 
351 |     def __init__(self, env, novelty_episode):
352 |         super().__init__(env, novelty_episode)
353 | 
354 |     # def reset(self, **kwargs):
355 |     #     self.num_episodes += 1
356 |     #     return self.env.reset(**kwargs)
357 | 
358 |     def step(self, action, **kwargs):
359 |         if self.post_novelty:
360 |             obs, reward, done, info = self.env.step(action, **kwargs)
361 |             if action == self.env.actions.pickup and self.env.carrying is None:
362 |                 agent_pos = self.env.agent_pos
363 |                 self.env.step(self.env.actions.forward, **kwargs)
364 |                 self.env.step(action, **kwargs)
365 |                 self.env.agent_pos = agent_pos
366 |                 self.env.step_count -= 2
367 |                 obs = self.env.gen_obs()
368 |             return obs, reward, done, info
369 |         return self.env.step(action, **kwargs)
370 | 
371 | 
372 | class Burdening(NoveltyWrapper):
373 | 
374 |     def __init__(self, env, novelty_episode):
375 |         super().__init__(env, novelty_episode)
376 | 
377 |     # def reset(self, **kwargs):
378 |     #     return self.env.reset(**kwargs)
379 | 
380 |     def step(self, action, **kwargs):
381 |         if self.post_novelty:
382 |             if action == self.env.actions.forward and self.env.carrying:
383 |                 self.env.step_count += 1
384 |             elif action == self.env.actions.forward and not self.env.carrying:
385 |                 obs, reward, done, info = self.env.step(action, **kwargs)
386 |                 if done:
387 |                     return obs, reward, done, info
388 |                 self.env.step_count -= 1
389 |         return self.env.step(action, **kwargs)
390 | 
391 | 
392 | class ColorRestriction(NoveltyWrapper):
393 | 
394 |     def __init__(self, env, novelty_episode):
395 |         super().__init__(env, novelty_episode)
396 | 
397 |     def _post_novelty_gen_grid(self, width, height):
398 |         # Create an empty grid
399 |         self.env.grid = Grid(width, height)
400 | 
401 |         # Generate the surrounding walls
402 |         self.env.grid.wall_rect(0, 0, width, height)
403 | 
404 |         # Place a goal in the bottom-right corner
405 |         self.env.put_obj(Goal(), width - 2, height - 2)
406 | 
407 |         # Create a vertical splitting wall
408 |         splitIdx = self._rand_int(2, width - 2)
409 |         self.env.grid.vert_wall(splitIdx, 0)
410 | 
411 |         # Place the agent at a random position and orientation
412 |         # on the left side of the splitting wall
413 |         self.env.place_agent(size=(splitIdx, height))
414 | 
415 |         # Place a door in the wall
416 |         doorIdx = self._rand_int(1, width - 2)
417 |         self.env.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx)
418 | 
419 |         doorIdx = self._rand_int(1, width - 2)
420 |         while isinstance(self.env.grid.get(splitIdx, doorIdx), Door):
421 |             doorIdx = self._rand_int(1, width - 2)
422 |         self.env.put_obj(Door('blue', is_locked=True), splitIdx, doorIdx)
423 | 
424 |         # Place a yellow key on the left side
425 |         self.env.place_obj(
426 |             obj=Key('yellow'),
427 |             top=(0, 0),
428 |             size=(splitIdx, height)
429 |         )
430 | 
431 |         # Place a blue key on the left side
432 |         self.env.place_obj(
433 |             obj=Key('blue'),
434 |             top=(0, 0),
435 |             size=(splitIdx, height)
436 |         )
437 | 
438 |         self.env.mission = "use blue key to open the blue door and then get to the goal"
439 | 
440 | 
441 |     def step(self, action, **kwargs):
442 |         if self.post_novelty:
443 |             if action == self.env.actions.pickup:
444 |                 fwd_pos = self.env.front_pos
445 |                 fwd_cell = self.env.grid.get(*fwd_pos)
446 |                 if fwd_cell and fwd_cell.can_pickup() and fwd_cell.color == 'yellow':
447 |                     return self.env.step(self.env.actions.done, **kwargs)
448 |         return self.env.step(action, **kwargs)
449 | 
450 | 
451 | 
452 | 
453 | 
454 |     
455 | 


--------------------------------------------------------------------------------
/novgrid_old/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eilab-gt/NovGrid/5873d2148b246ba9433307e8791ab794c0d7ca57/novgrid_old/utils/__init__.py


--------------------------------------------------------------------------------
/novgrid_old/utils/baseline_utils.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | from torch import nn
 3 | import gym
 4 | from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
 5 | 
 6 | 
 7 | class MinigridCNN(BaseFeaturesExtractor):
 8 |     """
 9 |     CNN for minigrid:
10 |     :param observation_space:
11 |     :param features_dim: Number of features extracted.
12 |         This corresponds to the number of unit for the last layer.
13 |     """
14 | 
15 |     def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 64):
16 |         super().__init__(observation_space, features_dim)
17 |         # We assume CxHxW images (channels first)
18 |         # Re-ordering will be done by pre-preprocessing or wrapper
19 |         n_input_channels = observation_space.shape[0]
20 |         final_dim = 64
21 |         self.cnn = nn.Sequential(
22 |             nn.Conv2d(n_input_channels, 16, (2, 2)),
23 |             nn.ReLU(),
24 |             nn.MaxPool2d((2, 2)),
25 |             nn.Conv2d(16, 32, (2, 2)),
26 |             nn.ReLU(),
27 |             nn.Conv2d(32, final_dim, (2, 2)),
28 |             nn.ReLU(),
29 |             nn.Flatten())
30 |         # Compute shape by doing one forward pass
31 |         with th.no_grad():
32 |             n_flatten = self.cnn(
33 |                 th.as_tensor(observation_space.sample()[None]).float()).shape[1]
34 | 
35 |         self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())
36 | 
37 |     def forward(self, observations: th.Tensor) -> th.Tensor:
38 |         return self.linear(self.cnn(observations))
39 | 


--------------------------------------------------------------------------------
/novgrid_old/utils/default.ini:
--------------------------------------------------------------------------------
1 | ; ML Defaults
2 | learning_rate=2.5e-4
3 | 
4 | ; Experiment Defaults
5 | total_timesteps=10000000
6 | 
7 | ; Novelty Defaults
8 | novelty-step=10000


--------------------------------------------------------------------------------
/novgrid_old/utils/novgrid_utils.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | 
 3 | from novgrid.novelty_generation.novelty_wrappers import NoveltyWrapper
 4 | 
 5 | 
 6 | def make_env(env_name, wrappers=None, wrapper_args=None, novelty_episode=-1):
 7 |     '''
 8 |     I think that you have to have this function because the 
 9 |     vectorization code expects a function wrappers is a list
10 | 
11 |     Parameters
12 |     ----------
13 |     env_name : str
14 |         Name of the environment
15 |     wrappers : list of functions
16 |         List of Wrapper functions that will be applied to the environment
17 |     wrapper_args : list of dicts
18 |         List of dictionaries, where each dictionary contains arguments for the wrapper at the same index
19 |     novelty_episode : int
20 |         Episode number for novelty generation. To be deprecated
21 |     '''
22 |     if wrappers is None:
23 |         wrappers = []
24 | 
25 |     def _init():
26 |         env = gym.make(env_name)
27 |         ## Check to make sure that there are the same number of arg dicts as wrappers
28 |         if wrapper_args is not None:
29 |             assert len(wrapper_args) == len(wrappers)
30 |         if wrappers:
31 |             for idx, wrapper in enumerate(wrappers):
32 |                 if wrapper_args is not None:
33 |                     if issubclass(wrapper, NoveltyWrapper):
34 |                         print("DEPRECATION WARNING: NoveltyWrapper should be redesigned with novelty_episode as a wrapper arg")
35 |                         env = wrapper(env, novelty_episode=novelty_episode, **wrapper_args[idx])
36 |                     else:
37 |                         env = wrapper(env, **wrapper_args[idx])
38 |                 else:
39 |                     if issubclass(wrapper, NoveltyWrapper):
40 |                         print("DEPRECATION WARNING: NoveltyWrapper should be redesigned with novelty_episode as a wrapper arg")
41 |                         env = wrapper(env, novelty_episode=novelty_episode)
42 |                     else:
43 |                         env = wrapper(env)
44 |         return env
45 |     return _init
46 | 
47 | 


--------------------------------------------------------------------------------
/novgrid_old/utils/parser.py:
--------------------------------------------------------------------------------
 1 | import configargparse
 2 | 
 3 | 
 4 | def getparser(inputs=None):
 5 |     """
 6 |     Reminder: all values have to be here to be modified by a config file.
 7 |     Precendence: command line > environment variables > config file values > defaults
 8 |     """
 9 |     p = configargparse.ArgParser(default_config_files=['default.ini'])
10 |     p.add('--exp_config', required=False, is_config_file=True, help='config file path for the experiment')
11 |     p.add('-t', '--total_timesteps', type=int, default=2500000, help='total timesteps per experiment')
12 |     p.add('-e', '--env', type=str, default='MiniGrid-DoorKey-8x8-v0', help='Core environment')
13 |     p.add('-s', '--saves_logs', type=str, default='novgrid_logs', help='where to save logs and models')
14 |     p.add('--device', type=str, default='', help='device. code assumes empty means to autocheck')
15 |     p.add('--load_model', type=str, default='', help='model to load. empty string learns from scratch') #models/best_model.zip')
16 |     p.add('--num_exp', type=int, default=1, help='number of learning experiments per run')
17 |     p.add('-w', '--wandb_track', default=False, action='store_true', help='whether or not to set up as a wandb run')
18 |     p.add('--learning_rate', type=float, default=2.5e-4, help='Learning rate for optimization')
19 |     p.add('--num_workers', type=int, default=1, help='number of learning workers, and therefore environments')
20 |     p.add('--seed', type=int, default=13, help='seed for randomness')
21 |     p.add('--debug', default=False, action='store_true')
22 |     p.add('--novelty_wrapper', type=str, default='', help='novelty to inject into environment')
23 |     p.add('--novelty_episode', type=int, default=10000, help='episode in which novelty is injected')
24 |     p.add('--eval_interval', type=int, default=1000, help='how many steps between evaluatations')
25 |     p.add('--log_interval', type=int, default=10, help='how many steps between logging')
26 |     
27 |     if inputs is None:
28 |         parsed_args = p.parse_args()
29 |     else:
30 |         parsed_args = p.parse_args(inputs)
31 |     print(parsed_args)
32 |     return parsed_args
33 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | gymnasium
2 | minigrid
3 | numpy
4 | pandas
5 | pathtools
6 | pillow
7 | stable_baselines3
8 | tensorboard


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | import glob
 4 | 
 5 | setup(
 6 |     name='novgrid',
 7 |     version='0.0.2',
 8 |     keywords='novelty, grid, memory, environment, agent, rl, openaigym, openai-gym, gym, gymnasium',
 9 |     url='https://github.com/eilab-gt/NovGrid',
10 |     description='A novelty experimentation wrapper for minigrid',
11 |     packages=['novgrid'],
12 |     install_requires=[
13 |         'numpy>=1.15.0',
14 |         'gymnasium',
15 |         'minigrid',
16 |         'stable_baselines3',
17 |     ],
18 |     data_files=glob.glob('novgrid/env_configs/json/*.json')
19 | )
20 | 


--------------------------------------------------------------------------------