├── .gitignore ├── LICENSE ├── README.md ├── novgrid ├── __init__.py ├── config.py ├── env_configs │ ├── __init__.py │ ├── generator.py │ └── json │ │ ├── door_key.json │ │ ├── door_key_change.json │ │ ├── increasing_num_crossings.json │ │ ├── sample.json │ │ └── simple_to_lava_to_simple_crossing.json ├── envs │ ├── __init__.py │ ├── colored_door_key.py │ └── novgrid_objects.py ├── example.py ├── novelty_env.py └── register_envs.py ├── novgrid_old ├── __init__.py ├── baselines │ ├── __init__.py │ ├── cnn_sample.py │ ├── models │ │ └── ppo_minigrid_example_model.zip │ ├── ppo_minigrid.py │ └── render_env.py ├── env_generator.py ├── envs │ ├── __init__.py │ ├── lavagapdoorkey.py │ └── multidoormultikey.py ├── novelty_generation │ ├── __init__.py │ ├── novelty_objs.py │ └── novelty_wrappers.py └── utils │ ├── __init__.py │ ├── baseline_utils.py │ ├── default.ini │ ├── novgrid_utils.py │ └── parser.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | novgrid/baselines/logs/* 3 | novgrid/baselines/logs 4 | *.pyc 5 | *__pycache__ 6 | *egg-info 7 | trained_models 8 | 9 | # PyPI 10 | build/* 11 | dist/* 12 | .idea/ 13 | 14 | # wandb 15 | *wandb* 16 | *videos* 17 | *runs* 18 | 19 | # Vim 20 | *.swp 21 | 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .Python 32 | build/ 33 | develop-eggs/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | pip-wheel-metadata/ 45 | share/python-wheels/ 46 | *.egg-info/ 47 | .installed.cfg 48 | *.egg 49 | MANIFEST 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .nox/ 65 | .coverage 66 | .coverage.* 67 | .cache 68 | nosetests.xml 69 | coverage.xml 70 | *.cover 71 | *.py,cover 72 | .hypothesis/ 73 | .pytest_cache/ 74 | 75 | # Translations 76 | *.mo 77 | *.pot 78 | 79 | # Django stuff: 80 | *.log 81 | local_settings.py 82 | db.sqlite3 83 | db.sqlite3-journal 84 | 85 | # Flask stuff: 86 | instance/ 87 | .webassets-cache 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # Jupyter Notebook 99 | .ipynb_checkpoints 100 | 101 | # IPython 102 | profile_default/ 103 | ipython_config.py 104 | 105 | # pyenv 106 | .python-version 107 | 108 | # pipenv 109 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 110 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 111 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 112 | # install all needed dependencies. 113 | #Pipfile.lock 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Georgia Institute of Technology 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NovGrid 2 | 3 | Novelty MiniGrid (NovGrid) is an extension of [MiniGrid](https://github.com/Farama-Foundation/Minigrid) environment that allows for the world properties and dynamics to change according to a generalized novelty generator. The MiniGrid environment is a grid-world that facilitates reinforcement learning algorithm development with low environment integration overhead, which allows for rapid iteration and testing. In addition to necessary grid world objects of agents, floor, walls, and goals, MiniGrid implements actionable objects including doors, keys, balls, and boxes. NovGrid extends the MiniGrid environment by expanding the way the grid world and the agent interact to allow novelties to be injected into the environment. Specifically this is done by expanding the functionality of the actionable objects (doors, keys, lava, etc.) already in MiniGrid and creating a general environment wrapper that injects novelty at a certain point in the training process. 4 | 5 | 6 | If you find this code useful, please reference in your paper: 7 | 8 | ``` 9 | @inproceedings{balloch2022novgrid, 10 | title={NovGrid: A Flexible Grid World for Evaluating Agent Response to Novelty}, 11 | author={Balloch, Jonathan and Lin, Zhiyu and Hussain, Mustafa and Srinivas, Aarun and Peng, Xiangyu and Kim, Julia and Riedl, Mark}, 12 | booktitle={In Proceedings of AAAI Symposium, Designing Artificial Intelligence for Open Worlds}, 13 | year={2022}, 14 | } 15 | ``` 16 | 17 | ## Installing the NovGrid Package 18 | Requirements: 19 | - Python 3.8+ 20 | 21 | From the NovGrid base directory run: 22 | ```shell 23 | pip install -e . 24 | ``` 25 | 26 | ## Examples using NovGrid 27 | 28 | Here is an example that trains a [Stable Baselines3](https://stable-baselines3.readthedocs.io/en/master/) implementation of PPO on a NovGrid environment that experiences the DoorKeyChange novelty: 29 | 30 | ```python 31 | import gymnasium as gym 32 | from stable_baselines3 import PPO 33 | import minigrid 34 | import novgrid 35 | 36 | config = { 37 | 'env_configs': 'door_key_change' 38 | 'total_timesteps': 10000000, 39 | 'novelty_step': 10000 40 | } 41 | 42 | env = novgrid.NoveltyEnv( 43 | env_configs=config['env_configs'], 44 | novelty_step=config['novelty_step'], 45 | wrappers=[minigrid.wrappers.FlatObsWrapper] 46 | ) 47 | 48 | model = PPO('MlpPolicy', env) 49 | model.learn(config['total_timesteps']) 50 | ``` 51 | 52 | ## Testing the Installation 53 | 54 | To run the sample environment set follow the instructions below. 55 | 56 | ```shell 57 | python novgrid/example.py 58 | ``` 59 | 60 | The expected output should be: 61 | ``` 62 | pygame 2.5.2 (SDL 2.28.2, Python 3.8.18) 63 | Hello from the pygame community. https://www.pygame.org/contribute.html 64 | step_num: 0; env_idx: [0]; rewards: [0]; dones: [False] 65 | step_num: 1; env_idx: [0]; rewards: [0]; dones: [False] 66 | step_num: 2; env_idx: [0]; rewards: [0]; dones: [False] 67 | step_num: 3; env_idx: [0]; rewards: [0]; dones: [False] 68 | step_num: 4; env_idx: [0]; rewards: [0]; dones: [False] 69 | step_num: 5; env_idx: [0]; rewards: [0]; dones: [False] 70 | step_num: 6; env_idx: [0]; rewards: [0]; dones: [False] 71 | step_num: 7; env_idx: [0]; rewards: [0]; dones: [False] 72 | step_num: 8; env_idx: [0]; rewards: [0]; dones: [False] 73 | step_num: 9; env_idx: [0]; rewards: [0]; dones: [False] 74 | step_num: 10; env_idx: [1]; rewards: [0]; dones: [ True] 75 | step_num: 11; env_idx: [1]; rewards: [0]; dones: [False] 76 | step_num: 12; env_idx: [1]; rewards: [0]; dones: [False] 77 | step_num: 13; env_idx: [1]; rewards: [0]; dones: [False] 78 | step_num: 14; env_idx: [1]; rewards: [0]; dones: [False] 79 | step_num: 15; env_idx: [1]; rewards: [0]; dones: [False] 80 | step_num: 16; env_idx: [1]; rewards: [0]; dones: [False] 81 | step_num: 17; env_idx: [1]; rewards: [0]; dones: [False] 82 | step_num: 18; env_idx: [1]; rewards: [0]; dones: [False] 83 | step_num: 19; env_idx: [1]; rewards: [0]; dones: [False] 84 | step_num: 20; env_idx: [1]; rewards: [0]; dones: [False] 85 | step_num: 21; env_idx: [2]; rewards: [0]; dones: [ True] 86 | step_num: 22; env_idx: [2]; rewards: [0]; dones: [False] 87 | step_num: 23; env_idx: [2]; rewards: [0]; dones: [False] 88 | step_num: 24; env_idx: [2]; rewards: [0]; dones: [False] 89 | step_num: 25; env_idx: [2]; rewards: [0]; dones: [False] 90 | step_num: 26; env_idx: [2]; rewards: [0]; dones: [False] 91 | step_num: 27; env_idx: [2]; rewards: [0]; dones: [False] 92 | step_num: 28; env_idx: [2]; rewards: [0]; dones: [False] 93 | step_num: 29; env_idx: [2]; rewards: [0]; dones: [False] 94 | ``` 95 | 96 | ## Novelties 97 | The following is a list and descriptions of the available novelty wrappers: 98 | 99 | **GoalLocationChange**: This novelty changes the location of the goal object. In MiniGrid the Goal object is usually at fixed location. 100 | 101 | **DoorLockToggle**: This novelty makes a door that is assumed to always be locked instead always unlocked and vice versa. In MiniGrid this is usually a static property. If a door that was unlocked before novelty injection is locked and requires a certain key after novelty injection, the policy learned before novelty injection will likely to fail. On the other hand, if novelty injection makes a previously locked door unlocked, an agent that does not ex- plore after novelty injection may always still seek out a key for a door that does not need it. 102 | 103 | **DoorKeyChange**: This novelty changes which key that opens a locked door. In MiniGrid doors are always un- locked by keys of the same color as the door. This means that if key and door colors do not match after novelty, agents will have to find another key to open the door. This may cause a previously learned policy to fail until the agent learns to start using the other key. 104 | 105 | **DoorNumKeys**: This novelty changes the number of keys needed to unlock a door. The default number of keys is one; this novelty tends to make policies fail because of the extra step of getting a second key. 106 | 107 | **ImperviousToLava**: Lava becomes non-harmful, whereas in Minigrid lava always immediately ends the episode with no reward. This may result in new routes to the goal that potentially bypass doors. 108 | 109 | **ActionRepetition**: This novelty changes the number of sequential timesteps an action will have to be repeated for it to occur. In MiniGrid it is usually assumed that for an action to occur it only needs to be issued once. So if an agent needed to command the pick-up action twice before novelty but only once afterwards, to reach its most effi- cient policy it would need to learn to not command pickup twice. 110 | 111 | **ForwardMovementSpeed**: This novelty modifies the number of steps an agent takes each time the forward command is issued. In MiniGrid agents only move one gridsquare per time step. As a result, if the agent gets faster after novelty, the original policy may have a harder time controlling the agent, and will need to learn how to embrace this change that could make it reach the goal in fewer steps. 112 | 113 | **ActionRadius**: This novelty is an example of a change to the relational preconditions of an action by changing the radius around the agent where an action works. In Mini- Grid this is usually assumed to be only a distance of one or zero, depending on the object. If an agent can pick up objects after novelty without being right next to them, it will have to realize this if it is to reach the optimum solu- tion. 114 | 115 | **ColorRestriction**: This novelty restricts the objects one can interact with by color. In MiniGrid it is usually as- sumed that all objects can be interacted with. If an agent is trained with no blue interactions before novelty and then isn’t allowed to interact with yellow objects after novelty, the agent will have to learn to pay attention to the color of objects. 116 | 117 | **Burdening**: This novelty changes the effect of actions based on whether the agent has any items in the inven- tory. In MiniGrid it is usually assumed that the inventory has no effect on actions. An agent experiencing this nov- elty, for example, might move twice as fast as usual when their inventory is empty, but half as fast as usual when in possession of the item, which it will have to compensate for strategically. 118 | -------------------------------------------------------------------------------- /novgrid/__init__.py: -------------------------------------------------------------------------------- 1 | from novgrid.novelty_env import NoveltyEnv 2 | 3 | from novgrid.register_envs import register_novgrid_envs 4 | 5 | register_novgrid_envs() 6 | -------------------------------------------------------------------------------- /novgrid/config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | ENV_CONFIG_FILE = "sample" 4 | TOTAL_TIME_STEPS = None 5 | NOVELTY_STEP = 10 6 | N_ENVS = 1 7 | RENDER_DISPLAY = False 8 | STEP_DELAY = 0.0 9 | 10 | 11 | def make_parser() -> argparse.ArgumentParser: 12 | """ 13 | Creates a default parser that contains everything that a novgrid environment would need. 14 | 15 | Returns: 16 | argparse.ArgumentParser: The parser 17 | """ 18 | parser = argparse.ArgumentParser() 19 | 20 | parser.add_argument( 21 | "--env-configs-file", 22 | "-ec", 23 | type=str, 24 | default=ENV_CONFIG_FILE, 25 | help="Use the path to a json file containing the env configs here.", 26 | ) 27 | parser.add_argument( 28 | "--total-time-steps", 29 | "-t", 30 | type=int, 31 | default=TOTAL_TIME_STEPS, 32 | help="The total number of time steps to run.", 33 | ) 34 | parser.add_argument( 35 | "--novelty-step", 36 | "-n", 37 | type=int, 38 | default=NOVELTY_STEP, 39 | help="The total number of time steps to run in an environment before injecting the next novelty.", 40 | ) 41 | parser.add_argument( 42 | "--n-envs", 43 | "-e", 44 | type=int, 45 | default=N_ENVS, 46 | help="The number of envs to use when running the vectorized env.", 47 | ) 48 | parser.add_argument( 49 | "--render-display", 50 | "-rd", 51 | type=lambda s: s.lower() in {"yes", "true", "t", "y"}, 52 | default=RENDER_DISPLAY, 53 | help="Whether or not to render the display of the environment as the agent is stepping.", 54 | ) 55 | parser.add_argument( 56 | "--step-delay", 57 | "-sd", 58 | type=float, 59 | default=STEP_DELAY, 60 | help="The amount of delay in seconds between each step call.", 61 | ) 62 | 63 | return parser 64 | -------------------------------------------------------------------------------- /novgrid/env_configs/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any 2 | 3 | import os.path 4 | import json 5 | 6 | 7 | def get_env_configs(name: str) -> List[Dict[str, Any]]: 8 | fname = f"{name}.json" if ".json" not in name else name 9 | full_fname = os.path.join(os.path.dirname(__file__), "json", fname) 10 | with open(full_fname) as f: 11 | return json.load(f) 12 | -------------------------------------------------------------------------------- /novgrid/env_configs/generator.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import os 3 | 4 | import json 5 | from typing import List, Any, Dict 6 | import numpy as np 7 | 8 | 9 | class Change(abc.ABC): 10 | """ 11 | Abstract class representing a change to be applied during environment configuration generation. 12 | """ 13 | 14 | def __init__(self) -> None: 15 | """Initialization method for the Change class.""" 16 | pass 17 | 18 | @abc.abstractmethod 19 | def generate_value(self, i: int, num_tasks: int) -> Any: 20 | """ 21 | Abstract method to generate a value based on the change. 22 | 23 | Args: 24 | i (int): Current iteration. 25 | num_tasks (int): Total number of tasks. 26 | 27 | Returns: 28 | Any: Generated value. 29 | """ 30 | pass 31 | 32 | 33 | class Constant(Change): 34 | """ 35 | A constant value change to be applied during environment configuration generation. 36 | """ 37 | 38 | def __init__(self, x: Any) -> None: 39 | """ 40 | Initializes the Constant change with a fixed value. 41 | 42 | Args: 43 | x (Any): Constant value. 44 | """ 45 | self.x = x 46 | 47 | def generate_value(self, i: int, num_tasks: int) -> Any: 48 | """ 49 | Generates the constant value. 50 | 51 | Args: 52 | i (int): Current iteration. 53 | num_tasks (int): Total number of tasks. 54 | 55 | Returns: 56 | Any: Constant value. 57 | """ 58 | return self.x 59 | 60 | 61 | class IntRange(Change): 62 | """ 63 | An integer range change to be applied during environment configuration generation. 64 | """ 65 | 66 | def __init__(self, start: int, end: int, inclusive: bool = False) -> None: 67 | """ 68 | Initializes the IntRange change with a specified range. 69 | 70 | Args: 71 | start (int): Start of the range. 72 | end (int): End of the range. 73 | inclusive (bool): Whether to include the end value in the range. 74 | """ 75 | super().__init__() 76 | self.start = start 77 | self.end = end 78 | self.inclusive = inclusive 79 | 80 | def generate_value(self, i: int, num_tasks: int) -> int: 81 | """ 82 | Generates an integer value within the specified range. 83 | 84 | Args: 85 | i (int): Current iteration. 86 | num_tasks (int): Total number of tasks. 87 | 88 | Returns: 89 | int: Generated integer value. 90 | """ 91 | return ( 92 | self.start + i * (self.end + int(self.inclusive) - self.start) // num_tasks 93 | ) 94 | 95 | 96 | class FloatRange(Change): 97 | """ 98 | A float range change to be applied during environment configuration generation. 99 | """ 100 | 101 | def __init__(self, start: float, end: float, inclusive: bool = False) -> None: 102 | """ 103 | Initializes the FloatRange change with a specified range. 104 | 105 | Args: 106 | start (float): Start of the range. 107 | end (float): End of the range. 108 | inclusive (bool): Whether to include the end value in the range. 109 | """ 110 | super().__init__() 111 | self.start = start 112 | self.end = end 113 | self.inclusive = inclusive 114 | self._linspace_cache = {} 115 | 116 | def generate_value(self, i: int, num_tasks: int) -> float: 117 | """ 118 | Generates a float value within the specified range. 119 | 120 | Args: 121 | i (int): Current iteration. 122 | num_tasks (int): Total number of tasks. 123 | 124 | Returns: 125 | float: Generated float value. 126 | """ 127 | if num_tasks not in self._linspace_cache: 128 | self._linspace_cache[num_tasks] = np.linspace( 129 | self.start, self.end, num=num_tasks, endpoint=self.inclusive 130 | ) 131 | return self._linspace_cache[num_tasks][i] 132 | 133 | 134 | class Toggle(Change): 135 | """ 136 | A toggle change to be applied during environment configuration generation. 137 | """ 138 | 139 | def __init__(self, val1: Any = False, val2: Any = True) -> None: 140 | """ 141 | Initializes the Toggle change with two values. 142 | 143 | Args: 144 | val1 (Any): First value. 145 | val2 (Any): Second value. 146 | """ 147 | super().__init__() 148 | self.val1 = val1 149 | self.val2 = val2 150 | 151 | def generate_value(self, i: int, num_tasks: int) -> Any: 152 | """ 153 | Generates one of the two values based on the iteration index. 154 | 155 | Args: 156 | i (int): Current iteration. 157 | num_tasks (int): Total number of tasks. 158 | 159 | Returns: 160 | Any: Generated value. 161 | """ 162 | return self.val1 if i % 2 == 0 else self.val2 163 | 164 | 165 | class ListChange(Change): 166 | """ 167 | A list change to be applied during environment configuration generation. 168 | """ 169 | 170 | def __init__(self, lst: List[Any], use_snake_boundary: bool = False) -> None: 171 | """ 172 | Initializes the ListChange with a list of values. 173 | 174 | Args: 175 | lst (List[Any]): List of values. 176 | use_snake_boundary (bool): Whether to use a snake-like boundary for the list. 177 | """ 178 | super().__init__() 179 | self.lst = lst 180 | if use_snake_boundary: 181 | self.lst = self.lst + self.lst[-2:0:-1] 182 | 183 | def generate_value(self, i: int, num_tasks: int) -> Any: 184 | """ 185 | Generates a value from the list based on the iteration index. 186 | 187 | Args: 188 | i (int): Current iteration. 189 | num_tasks (int): Total number of tasks. 190 | 191 | Returns: 192 | Any: Generated value. 193 | """ 194 | return self.lst[i % len(self.lst)] 195 | 196 | 197 | class EnvConfigGenerator: 198 | """ 199 | Class for generating environment configurations based on specified changes. 200 | """ 201 | 202 | def __init__(self, env_id: str, num_tasks: int, changes: Dict[str, Change]) -> None: 203 | """ 204 | Initializes the EnvConfigGenerator with the base environment ID, number of tasks, and changes. 205 | 206 | Args: 207 | env_id (str): Base environment ID. 208 | num_tasks (int): Number of tasks to generate. 209 | changes (Dict[str, Change]): Dictionary of changes to be applied. 210 | """ 211 | self.base_env_id = env_id 212 | self.num_tasks = num_tasks 213 | self.changes = changes 214 | 215 | def generate_env_configs(self) -> List[Dict[str, Any]]: 216 | """ 217 | Generates environment configurations based on the specified changes. 218 | 219 | Returns: 220 | List[Dict[str, Any]]: List of environment configurations. 221 | """ 222 | return [ 223 | { 224 | "env_id": self.base_env_id, 225 | **{ 226 | k: v.generate_value(i, self.num_tasks) 227 | for k, v in self.changes.items() 228 | }, 229 | } 230 | for i in range(self.num_tasks) 231 | ] 232 | 233 | def save_env_configs(self, json_file_name: str) -> List[Dict[str, Any]]: 234 | """ 235 | Generates and saves environment configurations to a JSON file. 236 | 237 | Args: 238 | json_file_name (str): Name of the JSON file. 239 | 240 | Returns: 241 | List[Dict[str, Any]]: List of environment configurations. 242 | """ 243 | env_configs = self.generate_env_configs() 244 | with open(json_file_name, "w") as f: 245 | json.dump(env_configs, f, indent=2) 246 | return env_configs 247 | 248 | def global_save_env_configs( 249 | self, name: str, override_existing_file: bool = False 250 | ) -> List[Dict[str, Any]]: 251 | """ 252 | Generates and globally saves environment configurations. 253 | 254 | Args: 255 | name (str): Name of the environment configuration. 256 | override_existing_file (bool): Whether to override an existing configuration file. 257 | 258 | Returns: 259 | List[Dict[str, Any]]: List of environment configurations. 260 | """ 261 | fname = f"{name}.json" if ".json" not in name else name 262 | full_fname = os.path.join(os.path.dirname(__file__), "json", fname) 263 | if os.path.exists(full_fname) and not override_existing_file: 264 | raise ValueError( 265 | f"The name {name} already has a global file for its env config. To override this file use the override_existing_file flag." 266 | ) 267 | self.save_env_configs(full_fname) 268 | 269 | 270 | def test_generator_bool_toggle(): 271 | """ 272 | Test case for EnvConfigGenerator with a boolean toggle change. 273 | """ 274 | expected_result = [ 275 | {"env_id": "LavaGrid", "lava_on": False}, 276 | {"env_id": "LavaGrid", "lava_on": True}, 277 | {"env_id": "LavaGrid", "lava_on": False}, 278 | ] 279 | 280 | result = EnvConfigGenerator( 281 | env_id="LavaGrid", num_tasks=3, changes={"lava_on": Toggle()} 282 | ).generate_env_configs() 283 | 284 | assert expected_result == result 285 | 286 | 287 | def test_generator_int_range(): 288 | """ 289 | Test case for EnvConfigGenerator with an integer range change. 290 | """ 291 | expected_result = [ 292 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1}, 293 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2}, 294 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 3}, 295 | ] 296 | 297 | result = EnvConfigGenerator( 298 | env_id="MiniGrid-SimpleCrossingS9N0-v0", 299 | num_tasks=3, 300 | changes={"num_crossings": IntRange(1, 4)}, 301 | ).generate_env_configs() 302 | 303 | assert expected_result == result 304 | 305 | 306 | def test_generator_list_change(): 307 | """ 308 | Test case for EnvConfigGenerator with a list change. 309 | """ 310 | expected_result = [ 311 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1}, 312 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2}, 313 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 3}, 314 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2}, 315 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1}, 316 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2}, 317 | ] 318 | 319 | result = EnvConfigGenerator( 320 | env_id="MiniGrid-SimpleCrossingS9N0-v0", 321 | num_tasks=6, 322 | changes={"num_crossings": ListChange([1, 2, 3], use_snake_boundary=True)}, 323 | ).generate_env_configs() 324 | 325 | assert expected_result == result 326 | 327 | 328 | def test_generator_float_range(): 329 | """ 330 | Test case for EnvConfigGenerator with a float range change. 331 | """ 332 | expected_result = [ 333 | {"env_id": "CartPole", "pole_weight": 5.0}, 334 | {"env_id": "CartPole", "pole_weight": 6.5}, 335 | {"env_id": "CartPole", "pole_weight": 8.0}, 336 | {"env_id": "CartPole", "pole_weight": 9.5}, 337 | ] 338 | 339 | result = EnvConfigGenerator( 340 | env_id="CartPole", 341 | num_tasks=4, 342 | changes={"pole_weight": FloatRange(5.0, 9.5, inclusive=True)}, 343 | ).generate_env_configs() 344 | 345 | assert expected_result == result 346 | 347 | 348 | def test_generator_multi_change(): 349 | """ 350 | Test case for EnvConfigGenerator with multiple changes. 351 | """ 352 | expected_result = [ 353 | { 354 | "env_id": "MiniGrid-SimpleCrossingS9N0-v0", 355 | "num_crossings": 1, 356 | "test_constant": 5, 357 | }, 358 | { 359 | "env_id": "MiniGrid-SimpleCrossingS9N0-v0", 360 | "num_crossings": 2, 361 | "test_constant": 5, 362 | }, 363 | { 364 | "env_id": "MiniGrid-SimpleCrossingS9N0-v0", 365 | "num_crossings": 3, 366 | "test_constant": 5, 367 | }, 368 | ] 369 | 370 | result = EnvConfigGenerator( 371 | env_id="MiniGrid-SimpleCrossingS9N0-v0", 372 | num_tasks=3, 373 | changes={"num_crossings": IntRange(1, 4), "test_constant": Constant(5)}, 374 | ).generate_env_configs() 375 | 376 | assert expected_result == result 377 | -------------------------------------------------------------------------------- /novgrid/env_configs/json/door_key.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "env_id": "NovGrid-ColoredDoorKeyEnv" 4 | } 5 | ] 6 | -------------------------------------------------------------------------------- /novgrid/env_configs/json/door_key_change.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "env_id": "NovGrid-ColoredDoorKeyEnv", 4 | "door_color": "red", 5 | "correct_key_color": "red", 6 | "key_colors": ["red", "blue"] 7 | }, 8 | { 9 | "env_id": "NovGrid-ColoredDoorKeyEnv", 10 | "door_color": "red", 11 | "correct_key_color": "blue", 12 | "key_colors": ["red", "blue"] 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /novgrid/env_configs/json/increasing_num_crossings.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1 }, 3 | { "env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2 }, 4 | { "env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 3 } 5 | ] 6 | -------------------------------------------------------------------------------- /novgrid/env_configs/json/sample.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "env_id": "MiniGrid-Empty-16x16-v0", 4 | "size": 10 5 | }, 6 | { 7 | "env_id": "MiniGrid-Empty-16x16-v0", 8 | "size": 8 9 | }, 10 | { 11 | "env_id": "MiniGrid-Empty-16x16-v0" 12 | } 13 | ] 14 | -------------------------------------------------------------------------------- /novgrid/env_configs/json/simple_to_lava_to_simple_crossing.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "env_id": "MiniGrid-SimpleCrossingS9N1-v0", 4 | "obstacle_type": "gridobj:Wall" 5 | }, 6 | { 7 | "env_id": "MiniGrid-SimpleCrossingS9N1-v0", 8 | "obstacle_type": "gridobj:Lava" 9 | }, 10 | { 11 | "env_id": "MiniGrid-SimpleCrossingS9N1-v0", 12 | "obstacle_type": "gridobj:Wall" 13 | } 14 | ] -------------------------------------------------------------------------------- /novgrid/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from novgrid.envs.colored_door_key import ColoredDoorKeyEnv 2 | -------------------------------------------------------------------------------- /novgrid/envs/colored_door_key.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Any, Dict, List, SupportsFloat 2 | 3 | from minigrid.core.grid import Grid 4 | from minigrid.core.world_object import Door, Goal, Key 5 | from minigrid.core.mission import MissionSpace 6 | from minigrid.minigrid_env import MiniGridEnv 7 | 8 | from novgrid.envs.novgrid_objects import ColorDoor 9 | 10 | 11 | class ColoredDoorKeyEnv(MiniGridEnv): 12 | 13 | def __init__( 14 | self, 15 | door_color: str = "yellow", 16 | key_colors: Optional[List[str]] = None, 17 | correct_key_color: str = "yellow", 18 | size: int = 8, 19 | max_steps: Optional[int] = None, 20 | **kwargs: Dict[str, Any] 21 | ): 22 | self.door_color = door_color 23 | self.key_colors = key_colors if key_colors is not None else [correct_key_color] 24 | self.correct_key_color = correct_key_color 25 | if max_steps is None: 26 | max_steps = 10 * size**2 27 | mission_space = MissionSpace(mission_func=self._gen_mission) 28 | super().__init__( 29 | mission_space=mission_space, grid_size=size, max_steps=max_steps, **kwargs 30 | ) 31 | 32 | @staticmethod 33 | def _gen_mission(): 34 | return "use the correct key to open the door and get to the goal" 35 | 36 | def step(self, action): 37 | return super().step(action) 38 | 39 | def _gen_grid(self, width: int, height: int): 40 | # Create an empty grid 41 | self.grid = Grid(width=width, height=height) 42 | 43 | # Generate the surrounding walls 44 | self.grid.wall_rect(0, 0, width, height) 45 | 46 | # Place a goal in the bottom right corner 47 | self.put_obj(Goal(), width - 2, height - 2) 48 | 49 | # Create a vertical splitting wall 50 | splitIdx = self._rand_int(2, width - 2) 51 | self.grid.vert_wall(splitIdx, 0) 52 | 53 | # Place the agent at a random position and orientation on the left side 54 | self.place_agent(size=(splitIdx, height)) 55 | 56 | # Place a door in the wall 57 | doorIdx = self._rand_int(1, width - 2) 58 | self.put_obj( 59 | ColorDoor( 60 | self.door_color, is_locked=True, key_color=self.correct_key_color 61 | ), 62 | splitIdx, 63 | doorIdx, 64 | ) 65 | 66 | # Place a yellow key on the left side 67 | for color in self.key_colors: 68 | self.place_obj(obj=Key(color=color), top=(0, 0), size=(splitIdx, height)) 69 | 70 | self.mission = self._gen_mission() 71 | -------------------------------------------------------------------------------- /novgrid/envs/novgrid_objects.py: -------------------------------------------------------------------------------- 1 | from minigrid.core.world_object import * 2 | 3 | class ColorDoor(Door): 4 | """ 5 | A Door instance where the key color can be specified and doesn't have to match the door 6 | """ 7 | def __init__(self, color, is_open=False, is_locked=False, key_color=None): 8 | super().__init__(color, is_open, is_locked) 9 | self.is_open = is_open 10 | self.is_locked = is_locked 11 | if key_color: 12 | self.key_color = key_color 13 | else: 14 | self.key_color = color 15 | 16 | def toggle(self, env, pos): 17 | # If the player has the right key to open the door 18 | if self.is_locked: 19 | if isinstance(env.carrying, Key) and env.carrying.color == self.key_color: 20 | self.is_locked = False 21 | self.is_open = True 22 | return True 23 | return False 24 | 25 | self.is_open = not self.is_open 26 | return True -------------------------------------------------------------------------------- /novgrid/example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | from novgrid import NoveltyEnv 5 | from novgrid.config import make_parser 6 | 7 | 8 | def run_example( 9 | args: argparse.Namespace, 10 | ) -> None: 11 | """Run an example run with random actions to test a given configuration 12 | 13 | Args: 14 | args (argparse.Namespace): The args from the default parser 15 | """ 16 | env = NoveltyEnv( 17 | env_configs=args.env_configs_file, 18 | novelty_step=args.novelty_step, 19 | n_envs=args.n_envs, 20 | render_mode="human" if args.render_display else None, 21 | ) 22 | 23 | env.reset() 24 | 25 | if args.total_time_steps is None: 26 | total_time_steps = (env.num_transfers + 1) * args.novelty_step 27 | else: 28 | total_time_steps = args.total_time_steps 29 | 30 | for step_num in range(0, total_time_steps, args.n_envs): 31 | observations, rewards, dones, infos = env.step( 32 | [env.action_space.sample() for _ in range(args.n_envs)] 33 | ) 34 | if args.render_display: 35 | env.render("human") 36 | print( 37 | f"step_num: {step_num}; env_idx: {env.get_attr('env_idx')}; rewards: {rewards}; dones: {dones}" 38 | ) 39 | 40 | if args.step_delay > 0: 41 | time.sleep(args.step_delay) 42 | 43 | 44 | if __name__ == "__main__": 45 | parser = make_parser() 46 | args = parser.parse_args() 47 | 48 | run_example(args=args) 49 | -------------------------------------------------------------------------------- /novgrid/novelty_env.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Optional, SupportsFloat, Tuple, Dict, Union 2 | 3 | import os 4 | 5 | import gymnasium as gym 6 | from gymnasium.envs.registration import EnvSpec 7 | import json 8 | import numpy as np 9 | import inspect 10 | 11 | from stable_baselines3.common.monitor import Monitor 12 | from stable_baselines3.common.vec_env import SubprocVecEnv 13 | from stable_baselines3.common.vec_env.base_vec_env import VecEnvStepReturn 14 | 15 | from novgrid.env_configs import get_env_configs 16 | import novgrid.envs.novgrid_objects as novgrid_objects 17 | 18 | 19 | class ListEnv(gym.Env): 20 | """ 21 | A vectorized environment that chains multiple environments together. 22 | 23 | Attributes: 24 | env_lst (List[gymnasium.Env]): List of environments to chain. 25 | env_idx (int): Index of the current environment. 26 | """ 27 | 28 | def __init__(self, env_lst: List[gym.Env]) -> None: 29 | """ 30 | Initializes the ListEnv with a list of environments. 31 | 32 | Args: 33 | env_lst (List[gymnasium.Env]): List of environments to chain. 34 | """ 35 | self.env_lst = env_lst 36 | self.env_idx = 0 37 | 38 | def incr_env_idx(self) -> bool: 39 | """ 40 | Increments the environment index, closing the current environment and resetting to the next one. 41 | 42 | Returns: 43 | bool: True if the environment index was successfully incremented, False otherwise. 44 | """ 45 | if self.env_idx >= len(self.env_lst) - 1: 46 | return False 47 | self.cur_env.close() 48 | self.env_idx += 1 49 | self.cur_env.reset() 50 | return True 51 | 52 | def step( 53 | self, action: Any 54 | ) -> Tuple[Any, SupportsFloat, bool, bool, Dict[str, Any]]: 55 | """ 56 | Takes a step in the current environment. 57 | 58 | Args: 59 | action (Any): Action to take. 60 | 61 | Returns: 62 | Tuple[Any, SupportsFloat, bool, bool, Dict[str, Any]]: Step information. 63 | """ 64 | return self.cur_env.step(action=action) 65 | 66 | def reset( 67 | self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None 68 | ) -> Tuple[Any, Dict[str, Any]]: 69 | """ 70 | Resets the current environment. 71 | 72 | Args: 73 | seed (Optional[int]): Seed for environment reset. 74 | options (Optional[Dict[str, Any]]): Additional options for reset. 75 | 76 | Returns: 77 | Tuple[Any, Dict[str, Any]]: Reset information. 78 | """ 79 | return self.cur_env.reset(seed=seed, options=options) 80 | 81 | def render(self) -> Union[gym.core.RenderFrame, List[gym.core.RenderFrame], None]: 82 | """ 83 | Renders the current environment. 84 | 85 | Returns: 86 | Union[gymnasium.core.RenderFrame, List[gymnasium.core.RenderFrame], None]: Rendered frame(s). 87 | """ 88 | return self.cur_env.render() 89 | 90 | def close(self) -> None: 91 | """Closes all environments in the list.""" 92 | for env in self.env_lst: 93 | env.close() 94 | 95 | @property 96 | def cur_env(self) -> gym.Env: 97 | """ 98 | Gets the current environment. 99 | 100 | Returns: 101 | gymnasium.Env: Current environment. 102 | """ 103 | return self.env_lst[self.env_idx] 104 | 105 | @property 106 | def unwrapped(self) -> gym.Env: 107 | """ 108 | Gets the unwrapped version of the current environment. 109 | 110 | Returns: 111 | gymnasium.Env: Unwrapped current environment. 112 | """ 113 | return self.cur_env 114 | 115 | @property 116 | def action_space(self) -> gym.Space: 117 | """ 118 | Gets the action space of the current environment. 119 | 120 | Returns: 121 | gymnasium.Space: Action space. 122 | """ 123 | return self.cur_env.action_space 124 | 125 | @property 126 | def observation_space(self) -> gym.Space: 127 | """ 128 | Gets the observation space of the current environment. 129 | 130 | Returns: 131 | gymnasium.Space: Observation space. 132 | """ 133 | return self.cur_env.observation_space 134 | 135 | @property 136 | def reward_range(self) -> Tuple[SupportsFloat, SupportsFloat]: 137 | """ 138 | Gets the reward range of the current environment. 139 | 140 | Returns: 141 | Tuple[SupportsFloat, SupportsFloat]: Reward range. 142 | """ 143 | return self.cur_env.reward_range 144 | 145 | @property 146 | def spec(self) -> EnvSpec: 147 | """ 148 | Gets the spec of the current environment. 149 | 150 | Returns: 151 | gymnasium.EnvSpec: Environment specification. 152 | """ 153 | return self.cur_env.spec 154 | 155 | @property 156 | def np_random(self) -> np.random.RandomState: 157 | """ 158 | Gets the random number generator of the current environment. 159 | 160 | Returns: 161 | np.random.RandomState: Random number generator. 162 | """ 163 | return self.cur_env.np_random 164 | 165 | @property 166 | def render_mode(self) -> Optional[str]: 167 | """ 168 | Gets the render mode of the current environment. 169 | 170 | Returns: 171 | Optional[str]: Render mode. 172 | """ 173 | return self.cur_env.render_mode 174 | 175 | 176 | class NoveltyEnv(SubprocVecEnv): 177 | """ 178 | A vectorized environment with novelty injection based on specified intervals. 179 | 180 | Attributes: 181 | novelty_step (int): Number of time steps between novelty injections. 182 | n_envs (int): Number of environments to run in parallel. 183 | print_novelty_box (bool): Whether to print a novelty injection box. 184 | num_transfers (int): Number of transfers between environments. 185 | total_time_steps (int): Total time steps taken. 186 | last_incr (int): Time step of the last environment index increment. 187 | start_index (int): Starting index for environment creation. 188 | monitor_dir (Optional[str]): Directory for monitoring results. 189 | """ 190 | 191 | def __init__( 192 | self, 193 | env_configs: Union[str, List[Dict[str, Any]]], 194 | novelty_step: int, 195 | wrappers: List[gym.Wrapper] = [], 196 | wrapper_kwargs_lst: List[Dict[str, Any]] = [], 197 | n_envs: int = 1, 198 | seed: Optional[int] = None, 199 | start_index: int = 0, 200 | monitor_dir: Optional[str] = None, 201 | monitor_kwargs: Optional[str] = None, 202 | start_method: Optional[str] = None, 203 | print_novelty_box: bool = False, 204 | render_mode: Optional[str] = None, 205 | ): 206 | """ 207 | Initializes the NoveltyEnv with the provided configurations. 208 | 209 | Args: 210 | env_configs (Union[str, List[Dict[str, Any]]]): Configuration for environments. 211 | novelty_step (int): Number of time steps between novelty injections. 212 | wrappers (List[gymnasium.Wrapper]): List of wrappers to apply to each environment. 213 | wrapper_kwargs_lst (List[Dict[str, Any]]): List of wrapper kwargs for each wrapper. 214 | n_envs (int): Number of environments to run in parallel. 215 | seed (Optional[int]): Random seed. 216 | start_index (int): Starting index for environment creation. 217 | monitor_dir (Optional[str]): Directory for monitoring results. 218 | monitor_kwargs (Optional[str]): Additional kwargs for monitoring. 219 | start_method (Optional[str]): Start method for parallel environments. 220 | print_novelty_box (bool): Whether to print a novelty injection box. 221 | render_mode (Optional[str]): Render mode for environments. 222 | """ 223 | if type(env_configs) == str: 224 | if os.path.exists(env_configs): 225 | with open(env_configs, "r") as f: 226 | env_configs = json.load(f) 227 | else: 228 | env_configs = get_env_configs(env_configs) 229 | 230 | world_objects = { 231 | k.lower(): v 232 | for k, v in inspect.getmembers( 233 | novgrid_objects, 234 | lambda obj: inspect.isclass(obj) 235 | and issubclass(obj, novgrid_objects.WorldObj), 236 | ) 237 | } 238 | 239 | for cfg in env_configs: 240 | for k, v in cfg.items(): 241 | if ( 242 | type(v) == str 243 | and v.startswith("gridobj:") 244 | and v.split(":")[-1].lower() in world_objects 245 | ): 246 | cfg[k] = world_objects[v.split(":")[-1].lower()] 247 | 248 | self.novelty_step = novelty_step 249 | self.n_envs = n_envs 250 | self.n_tasks = len(env_configs) 251 | self.print_novelty_box = print_novelty_box 252 | self.num_transfers = len(env_configs) - 1 253 | 254 | self.total_time_steps = 0 255 | self.last_incr = 0 256 | 257 | self.start_index = start_index 258 | self.monitor_dir = monitor_dir 259 | monitor_kwargs = {} if monitor_kwargs is None else monitor_kwargs 260 | 261 | def make_env_fn(rank): 262 | def _make_env(config): 263 | env_id = config["env_id"] 264 | env_kwargs = {k: v for k, v in config.items() if k != "env_id"} 265 | 266 | # Initialize the environment 267 | if isinstance(env_id, str): 268 | env = gym.make(env_id, render_mode=render_mode, **env_kwargs) 269 | else: 270 | env = env_id(**env_kwargs, render_mode=render_mode) 271 | 272 | # Optionally use the random seed provided 273 | if seed is not None: 274 | env.seed(seed + rank) 275 | env.action_space.seed(seed + rank) 276 | 277 | # Wrap the env in a Monitor wrapper 278 | # to have additional training information 279 | monitor_path = ( 280 | os.path.join(monitor_dir, str(rank)) 281 | if monitor_dir is not None 282 | else None 283 | ) 284 | # Create the monitor folder if needed 285 | if monitor_path is not None: 286 | os.makedirs(monitor_path, exist_ok=True) 287 | env = Monitor(env, filename=monitor_path, **monitor_kwargs) 288 | 289 | # Wrap the environment with the provided wrappers 290 | for wrapper_cls, wrapper_kwargs in zip( 291 | wrappers, 292 | wrapper_kwargs_lst 293 | + [{}] * max(0, len(wrappers) - len(wrapper_kwargs_lst)), 294 | ): 295 | env = wrapper_cls(env, **wrapper_kwargs) 296 | 297 | return env 298 | 299 | def _init(): 300 | # Returns a list env with each env constructed from the config in env_configs 301 | return ListEnv([_make_env(config) for config in env_configs]) 302 | 303 | return _init 304 | 305 | env_fns = [make_env_fn(rank=i + start_index) for i in range(n_envs)] 306 | 307 | if start_method is None: 308 | import multiprocessing as mp 309 | start_method = "fork" if "fork" in mp.get_all_start_methods() else None 310 | 311 | super().__init__(env_fns=env_fns, start_method=start_method) 312 | 313 | def step(self, actions: np.ndarray) -> VecEnvStepReturn: 314 | """ 315 | Takes a step in the parallel environments. 316 | 317 | Args: 318 | actions (np.ndarray): Actions for each environment. 319 | 320 | Returns: 321 | VecEnvStepReturn: The observations, rewards, dones, and infos from each environment 322 | """ 323 | observations, rewards, dones, infos = super().step(actions) 324 | # Increment total time steps 325 | self.total_time_steps += self.n_envs 326 | if self.total_time_steps - self.last_incr > self.novelty_step: 327 | self.last_incr = self.total_time_steps 328 | # Trigger the novelty if enough steps have passed 329 | novelty_injected = self.env_method("incr_env_idx") 330 | dones[:] = True 331 | 332 | if np.any(novelty_injected) and self.print_novelty_box: 333 | s = f"| Novelty Injected (on env {self.get_attr('env_idx')}) |" 334 | print("-" * len(s)) 335 | print(s) 336 | print("-" * len(s)) 337 | 338 | return observations, rewards, dones, infos 339 | -------------------------------------------------------------------------------- /novgrid/register_envs.py: -------------------------------------------------------------------------------- 1 | from gymnasium import Env 2 | from gymnasium.envs.registration import register 3 | import novgrid.envs as envs 4 | import inspect 5 | 6 | 7 | def register_novgrid_envs() -> None: 8 | """ 9 | Registers all the novgrid environments with gymnasium 10 | """ 11 | [ 12 | register(id=f"NovGrid-{name}", entry_point=f"novgrid.envs:{name}") 13 | for name, _ in inspect.getmembers( 14 | envs, lambda obj: inspect.isclass(obj) and issubclass(obj, Env) 15 | ) 16 | ] 17 | -------------------------------------------------------------------------------- /novgrid_old/__init__.py: -------------------------------------------------------------------------------- 1 | # Import the envs module so that envs register themselves 2 | import novgrid.envs 3 | 4 | # Import wrappers so it's accessible when installing with pip 5 | import novgrid.novelty_generation 6 | import novgrid 7 | -------------------------------------------------------------------------------- /novgrid_old/baselines/__init__.py: -------------------------------------------------------------------------------- 1 | import minigrid_novelty_generator -------------------------------------------------------------------------------- /novgrid_old/baselines/cnn_sample.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | import gym_minigrid # MUST BE IMPORTED TO SEE ENVIRONMENTS 4 | from gym_minigrid.wrappers import ImgObsWrapper 5 | import torch as th 6 | import wandb 7 | from wandb.integration.sb3 import WandbCallback 8 | 9 | from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor 10 | from stable_baselines3.common.vec_env.vec_transpose import VecTransposeImage 11 | from stable_baselines3 import PPO 12 | from stable_baselines3.common.callbacks import EvalCallback, CallbackList 13 | from stable_baselines3.common.env_util import make_vec_env 14 | 15 | from novgrid.utils.parser import getparser 16 | from novgrid.utils.novgrid_utils import make_env 17 | from novgrid.utils.baseline_utils import MinigridCNN 18 | from novgrid.novelty_generation.novelty_wrappers import * 19 | 20 | 21 | def main(args): 22 | if args.device: 23 | device = th.device(args.device) 24 | else: 25 | device = th.device('cuda' if th.cuda.is_available() else 'cpu') 26 | # Set up tracking and logging 27 | now = datetime.now() 28 | dt_string = now.strftime("%Y-%m-%d_%H-%M-%S") 29 | if args.saves_logs == 'logs': 30 | defaults = getparser([]) 31 | if defaults != args: 32 | logstr = '' 33 | for key, value in args.__dict__.items(): 34 | if defaults.__dict__[key] != value: 35 | elemstr = str(key) + '=' + str(value) + '_' 36 | logstr += elemstr 37 | else: 38 | logstr = args.saves_logs 39 | 40 | 41 | log_dir = os.path.abspath('./logs/' + logstr + '_' + dt_string) 42 | os.makedirs(log_dir) 43 | 44 | if args.wandb_track: 45 | wandb_config = { 46 | 'total_timesteps': args.total_timesteps, 47 | 'env_name': args.env, 48 | 'novelty_wrapper': args.novelty_wrapper, 49 | 'novelty_episode': args.novelty_episode, 50 | 'args': args 51 | } 52 | wandb.tensorboard.patch(root_logdir=log_dir, pytorch=True) 53 | wandb_run = wandb.init( 54 | project='novgrid_baselines', 55 | entity="balloch", 56 | settings=wandb.Settings(start_method="fork"), 57 | name=logstr + '_' + dt_string, 58 | dir='./logs/', 59 | config=wandb_config, 60 | sync_tensorboard=True, 61 | monitor_gym=True 62 | ) 63 | 64 | 65 | env_wrappers = [ImgObsWrapper] 66 | wrappers_args = [{}] 67 | 68 | n_envs = args.num_workers 69 | 70 | if args.novelty_wrapper: 71 | novelty_wrapper = eval(args.novelty_wrapper) 72 | env_wrappers = [novelty_wrapper] + env_wrappers 73 | wrappers_args.append({}) 74 | env_list = [make_env(env_name=args.env, 75 | wrappers=env_wrappers, 76 | wrapper_args=wrappers_args, 77 | novelty_episode=args.novelty_episode) for _ in range(n_envs)] 78 | env = VecMonitor(DummyVecEnv(env_list)) 79 | elif n_envs > 1: 80 | print('try make_vec_env') 81 | # This only works with a single wrapper for some reason. 82 | env = make_vec_env(args.env, 83 | n_envs=n_envs, 84 | seed=0, 85 | wrapper_class=env_wrappers[0]) 86 | else: 87 | env_list = [make_env(env_name=args.env, 88 | wrappers=env_wrappers, 89 | novelty_episode=args.novelty_episode) for _ in range(args.num_workers)] 90 | env = VecMonitor(DummyVecEnv(env_list)) 91 | 92 | # Set up and create model 93 | policy_kwargs = dict( 94 | features_extractor_class=MinigridCNN, 95 | features_extractor_kwargs=dict(features_dim=128), ) 96 | model = PPO("CnnPolicy", 97 | env, 98 | policy_kwargs=policy_kwargs, 99 | learning_rate=args.learning_rate, 100 | verbose=1, 101 | tensorboard_log=log_dir, 102 | device=device) 103 | if args.load_model: 104 | print(f'loading model {args.load_model}') 105 | model.set_parameters(args.load_model) 106 | 107 | # Set up experiment callbacks 108 | eval_callback = EvalCallback( 109 | VecTransposeImage(env), 110 | best_model_save_path=log_dir, 111 | log_path=log_dir, 112 | eval_freq=round(args.eval_interval/n_envs), 113 | deterministic=True, 114 | render=False) 115 | callback_list = [eval_callback] 116 | 117 | if args.wandb_track: 118 | tracking_callback = WandbCallback( 119 | gradient_save_freq=10, 120 | model_save_path=wandb_run.dir, #'/datadrive/wandb_tmp/', 121 | model_save_freq=10000, 122 | verbose=2) 123 | callback_list.append(tracking_callback) 124 | # wandb.watch(sb_policy) 125 | 126 | all_callback = CallbackList(callback_list) 127 | 128 | # Run Experiments! 129 | for exp in range(args.num_exp): 130 | model.learn( 131 | total_timesteps=args.total_timesteps, 132 | log_interval=args.log_interval, 133 | tb_log_name='run_{}'.format(exp), 134 | callback=all_callback, 135 | ) 136 | model.save(log_dir + '/' + 'run_{}'.format(exp) + '_final_model') 137 | 138 | 139 | if __name__ == "__main__": 140 | config_args = getparser() 141 | main(config_args) 142 | -------------------------------------------------------------------------------- /novgrid_old/baselines/models/ppo_minigrid_example_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eilab-gt/NovGrid/5873d2148b246ba9433307e8791ab794c0d7ca57/novgrid_old/baselines/models/ppo_minigrid_example_model.zip -------------------------------------------------------------------------------- /novgrid_old/baselines/ppo_minigrid.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | 4 | import gym_minigrid # MUST BE IMPORTED TO SEE ENVIRONMENTS 5 | from gym_minigrid.wrappers import FlatObsWrapper 6 | import torch as th 7 | from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor 8 | from stable_baselines3 import PPO 9 | 10 | from novgrid.utils.parser import getparser 11 | from novgrid.utils.novgrid_utils import make_env 12 | from novgrid.novelty_generation.novelty_wrappers import * 13 | 14 | device = th.device('cuda' if th.cuda.is_available() else 'cpu') 15 | 16 | 17 | def main(args): 18 | # Set up tracking 19 | now = datetime.now() 20 | dt_string = now.strftime("%d-%m-%Y_%H-%M-%S") 21 | log_dir = os.path.abspath('./logs/' + args.saves_logs + '_' + dt_string) 22 | os.makedirs(log_dir) 23 | 24 | # Create environments 25 | novelty_wrapper = eval(args.novelty_wrapper) 26 | env_wrappers = [novelty_wrapper, FlatObsWrapper] 27 | env_list = [make_env(args.env, log_dir, env_wrappers, args.novelty_episode) for _ in range(args.num_workers)] 28 | env = VecMonitor(DummyVecEnv(env_list)) 29 | 30 | # Set up and create model 31 | model = PPO("MlpPolicy", 32 | env, 33 | learning_rate=args.learning_rate, 34 | verbose=1, 35 | tensorboard_log=log_dir, 36 | device=device) 37 | if args.load_model: 38 | print(f'loading model {args.load_model}') 39 | model.set_parameters(args.load_model) 40 | 41 | for exp in range(args.num_exp): 42 | model.learn( 43 | total_timesteps=args.total_timesteps, 44 | tb_log_name='run_{}'.format(exp) 45 | ) 46 | model.save(log_dir + '/' + 'run_{}'.format(exp) + '_final_model') 47 | 48 | 49 | if __name__ == "__main__": 50 | config_args = getparser() 51 | main(config_args) 52 | -------------------------------------------------------------------------------- /novgrid_old/baselines/render_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | import novgrid 5 | import gym_minigrid 6 | import gym 7 | from PIL import Image 8 | from gym_minigrid.wrappers import * 9 | 10 | 11 | env_name = 'MiniGrid-LavaShortcutMaze8x8-v0' 12 | # env = RGBImgPartialObsWrapper(env) 13 | # env = ImgObsWrapper(env) 14 | 15 | 16 | env = gym.make(env_name) 17 | env.reset() 18 | outs = env.step(1) 19 | outs2 = env.step(1) 20 | outs3 = env.step(2) 21 | 22 | 23 | # Simple rendering 24 | img = Image.fromarray(env.render('rgb_array'),'RGB') 25 | img.show() 26 | 27 | # ## Video rendering with timing 28 | # t0 = time.time() 29 | # num_frames=5000 30 | # images = [] 31 | # for i in range(num_frames): 32 | # img = Image.fromarray(env.render('rgb_array'),'RGB') 33 | # images.append(img) 34 | # # img.show() 35 | # obs, reward, done, info = env.step(0) 36 | # images[0].save(env_name+'out.gif', 37 | # save_all=True, 38 | # append_images=images[1:], 39 | # optimize=False, 40 | # duration=40, 41 | # loop=0) 42 | # t1 = time.time() 43 | # dt = t1 - t0 44 | # frames_per_sec = num_frames / dt 45 | # 46 | # print('Rendering FPS : {:.0f}'.format(frames_per_sec)) 47 | -------------------------------------------------------------------------------- /novgrid_old/env_generator.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, Union, Any, Tuple 2 | 3 | import numpy as np 4 | 5 | def generate_config_json( 6 | base_env: str, 7 | num_tasks: int = 0, 8 | change_vars: Optional[List[str]] = None, 9 | change_types: Optional[List[type]] = None, 10 | change_ranges: Optional[List[Union[Tuple[Any, Any], None]]] = None, 11 | ): 12 | """ 13 | base_env : str, the name of the env_id 14 | num_tasks : the number of changes (so the number of resulting environments is cahnge_count+1), 15 | change_vars : the list of kwarg variable names to change, 16 | change_types : the list of types of the kwarg variables to change, options 'bool', 'int', or 'float' 17 | change_ranges : the ranges of the kwarg variables to change if type is not bool. 18 | Each must be len == 2 or None for bool. 19 | For an int will return the largest subinterval divisible by num_taskss. 20 | """ 21 | if change_ranges is not None: 22 | for r in change_ranges: 23 | assert r is None or len(r) == 2 24 | 25 | json_data = [] 26 | # for n in range(num_tasks): 27 | var_values = {} 28 | for idx, var in enumerate(change_vars): 29 | if change_types[idx] is bool: 30 | var_values[var] = [bool(i % 2) for i in range(num_tasks)] 31 | elif change_types[idx] is int: 32 | var_values[var] = [ 33 | val * (change_ranges[idx][1] - change_ranges[idx][0]) // num_tasks 34 | + change_ranges[idx][0] 35 | for val in range(num_tasks) 36 | ] 37 | elif change_types[idx] is float: 38 | var_values[var] = list(np.linspace(*change_ranges[idx]), num_tasks) 39 | else: 40 | raise TypeError 41 | for i in range(num_tasks): 42 | json_data.append( 43 | { 44 | "env_id": base_env, 45 | **dict(map(lambda x: (x[0], x[1][i]), var_values.items())), 46 | } 47 | ) 48 | return json_data 49 | 50 | 51 | def assert_value(ground_truth, value): 52 | try: 53 | assert ground_truth == value 54 | except: 55 | print("Test Failed!") 56 | print("Expected:", ground_truth) 57 | print("Received:", value) 58 | 59 | 60 | def test1(): 61 | case1 = [ 62 | {"env_id": "LavaGrid", "lava_on": False}, 63 | {"env_id": "LavaGrid", "lava_on": True}, 64 | {"env_id": "LavaGrid", "lava_on": False}, 65 | ] 66 | 67 | case2 = [ 68 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 1}, 69 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 2}, 70 | {"env_id": "MiniGrid-SimpleCrossingS9N0-v0", "num_crossings": 3}, 71 | ] 72 | 73 | assert_value(case1, generate_config_json("LavaGrid", 3, ["lava_on"], [bool])) 74 | assert_value( 75 | case2, 76 | generate_config_json( 77 | "MiniGrid-SimpleCrossingS9N0-v0", 3, ["num_crossings"], [int], [(1, 4)] 78 | ), 79 | ) 80 | -------------------------------------------------------------------------------- /novgrid_old/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from novgrid.envs.multidoormultikey import * 2 | from novgrid.envs.lavagapdoorkey import * -------------------------------------------------------------------------------- /novgrid_old/envs/lavagapdoorkey.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | import numpy as np 4 | 5 | MAXIMUM_SIZE_F = 10 # Gap needs only 4, when door key involved make it 10. 6 | 7 | 8 | class LavaGapDoorKeyEnv(MiniGridEnv): 9 | """ 10 | Environment with a door and key with one wall of lava with a small gap to cross through 11 | sparse reward 12 | """ 13 | def __init__(self, size, obstacle_type=Lava, seed=None): 14 | self.obstacle_type = obstacle_type 15 | self.fixed_env = False 16 | self.simple_reward = False 17 | self.no_door_key = False # True for gap. 18 | 19 | super().__init__( 20 | grid_size=size, 21 | max_steps=MAXIMUM_SIZE_F*size*size, 22 | # Set this to True for maximum speed 23 | see_through_walls=False, 24 | seed=seed 25 | ) 26 | 27 | def _gen_grid(self, width, height): 28 | # Create an empty grid 29 | self.grid = Grid(width, height) 30 | 31 | # Generate the surrounding walls 32 | self.grid.wall_rect(0, 0, width, height) 33 | 34 | # Place a goal in the bottom-right corner 35 | self.put_obj(Goal(), width - 2, height - 2) 36 | 37 | if self.fixed_env: 38 | # Create a vertical splitting wall 39 | splitIdx = width//2 - 1 40 | self.gap_pos = np.array((2,3)) 41 | self.grid.vert_wall(splitIdx+3, 2, width // 3, self.obstacle_type) 42 | doorIdx = height//2 - 1 43 | 44 | else: 45 | # Create a vertical splitting wall 46 | splitIdx = self._rand_int(2, width-2) 47 | # Place the obstacle wall 48 | if splitIdx > width // 3 + 1: 49 | self.gap_pos = np.array(( 50 | self._rand_int(1, width // 3), 51 | self._rand_int(1, height - 2), 52 | )) 53 | self.grid.horz_wall(self.gap_pos[0], self.gap_pos[1], width // 3, self.obstacle_type) 54 | else: 55 | self.gap_pos = np.array(( 56 | self._rand_int(1, width - 2), 57 | self._rand_int(1, height // 3), 58 | )) 59 | self.grid.vert_wall(self.gap_pos[0], self.gap_pos[1], height // 3, self.obstacle_type) 60 | doorIdx = self._rand_int(1, height - 2) 61 | # Place a door in the wall 62 | while abs(doorIdx - self.gap_pos[1]) < 2: 63 | doorIdx = self._rand_int(1, height-2) 64 | 65 | 66 | self.grid.vert_wall(splitIdx, 0) 67 | if self.no_door_key: 68 | # Put a hole in the wall 69 | self.grid.set(splitIdx, doorIdx, None) 70 | else: 71 | self.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx) 72 | if self.fixed_env: 73 | self.put_obj(Key('yellow'), 2, 4) 74 | else: 75 | # Place a yellow key on the left side 76 | self.place_obj( 77 | obj=Key('yellow'), 78 | top=(0, 0), 79 | size=(splitIdx, height) 80 | ) 81 | 82 | # Place the agent at a random position and orientation 83 | # on the left side of the splitting wall 84 | self.place_agent(size=(splitIdx, height)) 85 | 86 | self.mission = "Avoid the lava and use the key to open the door and then get to the goal" 87 | 88 | def _reward(self): 89 | """ 90 | Compute the reward to be given upon success 91 | """ 92 | agent_pos = self.agent_pos 93 | object = self.grid.get(agent_pos[0], agent_pos[1]) 94 | if (object.type == 'lava') and (): 95 | return -1 # Add Negative reward for stepping on Lava. 96 | 97 | if self.simple_reward: 98 | return 1 99 | else: 100 | return (1 - 0.9 * (self.step_count / self.max_steps)) * 10 101 | 102 | 103 | class LavaShortcutMaze(MiniGridEnv): 104 | """ 105 | Environment with a door and key with one wall of lava with a small gap to cross through 106 | sparse reward 107 | """ 108 | def __init__(self, size, obstacle_type=Lava, seed=None): 109 | self.obstacle_type = obstacle_type 110 | self.fixed_env = True 111 | self.simple_reward = False 112 | 113 | super().__init__( 114 | grid_size=size, 115 | max_steps=MAXIMUM_SIZE_F*size*size, 116 | # Set this to True for maximum speed 117 | see_through_walls=False, 118 | seed=seed 119 | ) 120 | 121 | def _gen_grid(self, width, height): 122 | # Create an empty grid 123 | self.grid = Grid(width, height) 124 | 125 | # Generate the surrounding walls 126 | self.grid.wall_rect(0, 0, width, height) 127 | 128 | # first vertical walls 129 | first_wall_width = 2 130 | splitIdx = width//2 131 | self.grid.vert_wall(first_wall_width, 2, height-3) 132 | 133 | if width > 6: 134 | for extra_wall_pos in range(1,(width-5)//2+1): 135 | if extra_wall_pos % 2 == 0: 136 | self.grid.vert_wall(first_wall_width+extra_wall_pos*2, 2, height - 3) 137 | # Place a goal in the bottom-right corner 138 | # self.put_obj(Goal(), width - 2, height - 2) 139 | else: 140 | self.grid.vert_wall(first_wall_width+extra_wall_pos*2, 0, height - 3) 141 | # Place a goal in the top-right corner 142 | # self.put_obj(Goal(), width - 2, 1) 143 | else: 144 | pass 145 | # Place a goal in the bottom-right corner 146 | self.put_obj(Goal(), width - 2, height - 2) 147 | 148 | # Create a horizontal lava 149 | self.grid.horz_wall(2, height-2, width-4, Lava) 150 | 151 | # Place the agent at a fixed bottom left position 152 | # and random orientation 153 | self.place_agent(top=(0, height-2), 154 | size=(first_wall_width, height)) 155 | 156 | self.mission = "Avoid the lava and use the key to open the door and then get to the goal" 157 | 158 | def _reward(self): 159 | """ 160 | Compute the reward to be given upon success 161 | """ 162 | agent_pos = self.agent_pos 163 | object = self.grid.get(agent_pos[0], agent_pos[1]) 164 | if object.type == "lava": 165 | return -1 # Add Negative reward for stepping on Lava. 166 | 167 | if self.simple_reward: 168 | return 1 169 | else: 170 | return (1 - 0.9 * (self.step_count / self.max_steps)) # * 10 171 | 172 | class LavaSafeMaze8x8(LavaShortcutMaze): 173 | def __init__(self): 174 | super().__init__(size=8) 175 | 176 | def _reward(self): 177 | """ 178 | Compute the reward to be given upon success 179 | """ 180 | # agent_pos = self.agent_pos 181 | # object = self.grid.get(agent_pos[0], agent_pos[1]) 182 | # if object.type == "lava": 183 | # return -1 # NO Negative reward for stepping on Lava. 184 | 185 | if self.simple_reward: 186 | return 1 187 | else: 188 | return (1 - 0.9 * (self.step_count / self.max_steps)) # * 10 189 | 190 | def step(self, action, **kwargs): 191 | fwd_pos = self.front_pos 192 | fwd_cell = self.grid.get(*fwd_pos) 193 | obs, reward, done, info = super().step(action) 194 | if done and fwd_cell and fwd_cell.type == 'lava': 195 | self.agent_pos = fwd_pos 196 | obs = self.gen_obs() 197 | done = False 198 | return obs, reward, done, info 199 | 200 | 201 | 202 | class LavaGapDoorKeyEnv5x5(LavaGapDoorKeyEnv): 203 | def __init__(self): 204 | super().__init__(size=5) 205 | 206 | 207 | class LavaGapDoorKeyEnv6x6(LavaGapDoorKeyEnv): 208 | def __init__(self): 209 | super().__init__(size=6) 210 | 211 | 212 | class LavaGapDoorKeyEnv8x8(LavaGapDoorKeyEnv): 213 | def __init__(self): 214 | super().__init__(size=8) 215 | 216 | 217 | class LavaGapDoorKeyEnv16x16(LavaGapDoorKeyEnv): 218 | def __init__(self): 219 | super().__init__(size=16) 220 | 221 | 222 | class LavaShortcutMaze6x6(LavaShortcutMaze): 223 | def __init__(self): 224 | super().__init__(size=6) 225 | 226 | 227 | class LavaShortcutMaze7x7(LavaShortcutMaze): 228 | def __init__(self): 229 | super().__init__(size=7) 230 | 231 | 232 | class LavaShortcutMaze8x8(LavaShortcutMaze): 233 | def __init__(self): 234 | super().__init__(size=8) 235 | 236 | 237 | class LavaShortcutMaze9x9(LavaShortcutMaze): 238 | def __init__(self): 239 | super().__init__(size=9) 240 | 241 | 242 | 243 | # register( 244 | # id='MiniGrid-LavaGapDoorKeyEnv5x5-v0', 245 | # entry_point='novgrid.envs:LavaGapDoorKeyEnv5x5' 246 | # ) 247 | # print("hello") 248 | 249 | register( 250 | id='MiniGrid-LavaGapDoorKeyEnv6x6-v0', 251 | entry_point='novgrid.envs:LavaGapDoorKeyEnv6x6' 252 | ) 253 | 254 | register( 255 | id='MiniGrid-LavaGapDoorKeyEnv8x8-v0', 256 | entry_point='novgrid.envs:LavaGapDoorKeyEnv8x8' 257 | ) 258 | 259 | register( 260 | id='MiniGrid-LavaGapDoorKeyEnv16x16-v0', 261 | entry_point='novgrid.envs:LavaGapDoorKeyEnv16x16' 262 | ) 263 | 264 | ###### 265 | 266 | register( 267 | id='MiniGrid-LavaShortcutMaze6x6-v0', 268 | entry_point='novgrid.envs:LavaShortcutMaze6x6' 269 | ) 270 | 271 | register( 272 | id='MiniGrid-LavaShortcutMaze7x7-v0', 273 | entry_point='novgrid.envs:LavaShortcutMaze7x7' 274 | ) 275 | 276 | register( 277 | id='MiniGrid-LavaShortcutMaze8x8-v0', 278 | entry_point='novgrid.envs:LavaShortcutMaze8x8' 279 | ) 280 | 281 | register( 282 | id='MiniGrid-LavaShortcutMaze9x9-v0', 283 | entry_point='novgrid.envs:LavaShortcutMaze9x9' 284 | ) 285 | 286 | register( 287 | id='MiniGrid-LavaSafeMaze8x8-v0', 288 | entry_point='novgrid.envs:LavaSafeMaze8x8' 289 | ) 290 | -------------------------------------------------------------------------------- /novgrid_old/envs/multidoormultikey.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.envs import DoorKeyEnv 2 | from gym_minigrid.register import register 3 | from gym_minigrid.minigrid import Key, Grid, Door, Goal, COLORS 4 | from matplotlib.pyplot import grid 5 | import numpy as np 6 | 7 | 8 | class MultiDoorMultiKeyEnv(DoorKeyEnv): 9 | def __init__(self, size=6, doors=1, keys=1, determ=False, seed=13, locked=None): 10 | if (doors > (size - 3)) or (keys > (size - 3)): 11 | raise ValueError("Both doors:{} and keys:{} must be less than size-3:{}".format(doors, keys, size)) 12 | elif doors > 6 or keys > 6: 13 | raise ValueError("Both doors:{} and keys:{} must be less than 6".format(doors, keys)) 14 | self.doors = doors 15 | self.keys = keys 16 | self.seed_value = seed 17 | self.determ = determ 18 | if self.determ: 19 | rand_num_gen = np.random.default_rng(self.seed_value) 20 | self.door_idxs = rand_num_gen.choice(size - 3, size=self.doors, replace=False) + 1 21 | self.key_widths = rand_num_gen.choice(size, size=self.keys) 22 | self.key_heights = rand_num_gen.choice(size, size=self.keys) 23 | self.split_idx = rand_num_gen.integers(low=2, high=size - 2) 24 | super().__init__(size=size) 25 | 26 | def _gen_grid(self, width, height): 27 | # Create an empty grid 28 | self.grid = Grid(width, height) 29 | 30 | # Generate the surrounding walls 31 | self.grid.wall_rect(0, 0, width, height) 32 | 33 | # Place a goal in the bottom-right corner 34 | self.put_obj(Goal(), width - 2, height - 2) 35 | 36 | # Create a vertical splitting wall 37 | if self.determ: 38 | split_idx = self.split_idx 39 | else: 40 | split_idx = self._rand_int(2, width - 2) 41 | self.grid.vert_wall(split_idx, 0) 42 | 43 | # Place the agent at a random position and orientation 44 | # on the left side of the splitting wall 45 | self.place_agent(size=(split_idx, height)) 46 | 47 | ## Place doors and keys 48 | ## Warning: for Python < 3.5 dict order is non-deterministic 49 | colors = list(COLORS.keys()) 50 | rand_num_gen = np.random.default_rng(self.seed_value) 51 | # place_obj drops the object randomly in a rectangle 52 | # put_obj puts an object in a specific place 53 | for door in range(self.doors): 54 | if self.determ: 55 | door_idx = self.door_idxs[door] 56 | else: 57 | door_idx = None 58 | while not door_idx or isinstance(self.grid.get(split_idx, door_idx), Door): 59 | door_idx = rand_num_gen.choice(height - 3) + 1 60 | self.put_obj(Door(colors[door], is_locked=True), split_idx, door_idx) 61 | 62 | for key in range(self.keys): 63 | if self.determ: 64 | self.put_obj(Key(colors[key]), self.key_widths[key], self.key_heights[key]) 65 | self.place_obj(obj=Key(colors[key]), top=(0, 0), size=(split_idx, height)) 66 | 67 | self.mission = "use the key to open the same color door and then get to the goal" 68 | 69 | class DoorMultiKeyEnv5x5(DoorKeyEnv): 70 | def __init__(self): 71 | super().__init__(size=5, doors=2, keys=2) 72 | 73 | class DoorMultiKeyEnv6x6(DoorKeyEnv): 74 | def __init__(self): 75 | super().__init__(size=6, doors=2, keys=2) 76 | 77 | class DoorMultiKeyEnv16x16(DoorKeyEnv): 78 | def __init__(self): 79 | super().__init__(size=16, doors=2, keys=2) 80 | 81 | register( 82 | id='MiniGrid-DoorMultiKey-5x5-v0', 83 | entry_point='novgrid.envs:DoorMultiKeyEnvEnv5x5' 84 | ) 85 | 86 | register( 87 | id='MiniGrid-DoorMultiKey-6x6-v0', 88 | entry_point='novgrid.envs:DoorMultiKeyEnv6x6' 89 | ) 90 | 91 | register( 92 | id='MiniGrid-DoorMultiKey-16x16-v0', 93 | entry_point='novgrid.envs:DoorMultiKeyEnv16x16' 94 | ) 95 | -------------------------------------------------------------------------------- /novgrid_old/novelty_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eilab-gt/NovGrid/5873d2148b246ba9433307e8791ab794c0d7ca57/novgrid_old/novelty_generation/__init__.py -------------------------------------------------------------------------------- /novgrid_old/novelty_generation/novelty_objs.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import Key, Door 2 | 3 | 4 | class ColorDoor(Door): 5 | """ 6 | A Door instance where the key color can be specified and doesn't have to match the door 7 | """ 8 | def __init__(self, color, is_open=False, is_locked=False, key_color=None): 9 | super().__init__(color, is_open, is_locked) 10 | self.is_open = is_open 11 | self.is_locked = is_locked 12 | if key_color: 13 | self.key_color = key_color 14 | else: 15 | self.key_color = color 16 | 17 | def toggle(self, env, pos): 18 | # If the player has the right key to open the door 19 | if self.is_locked: 20 | if isinstance(env.carrying, Key) and env.carrying.color == self.key_color: 21 | self.is_locked = False 22 | self.is_open = True 23 | return True 24 | return False 25 | 26 | self.is_open = not self.is_open 27 | return True 28 | 29 | 30 | class MultiKeyDoor(Door): 31 | """ 32 | A Door instance where multiple keys are required to unlock the door 33 | """ 34 | def __init__(self, color, is_open=False, is_locked=False, key_colors=None): 35 | super().__init__(color, is_open, is_locked) 36 | self.is_open = is_open 37 | self.is_locked = is_locked 38 | if key_colors: 39 | self.key_colors = key_colors 40 | else: 41 | self.key_colors = color 42 | 43 | 44 | def toggle(self, env, pos): 45 | if self.is_locked: 46 | if isinstance(env.carrying, Key) and env.carrying.color in self.key_colors: 47 | self.key_colors.remove(env.carrying.color) 48 | if len(self.key_colors) == 0: 49 | self.is_locked = False 50 | self.is_open = True 51 | return True 52 | return False 53 | 54 | self.is_open = not self.is_open 55 | return True -------------------------------------------------------------------------------- /novgrid_old/novelty_generation/novelty_wrappers.py: -------------------------------------------------------------------------------- 1 | # change the self.mission 2 | # you should be able to specify the exact novelty AND that there should be a random novelty 3 | import abc 4 | import gym 5 | import numpy as np 6 | 7 | from .novelty_objs import ColorDoor, MultiKeyDoor 8 | from gym_minigrid.minigrid import Key, Grid, Door, Goal 9 | 10 | 11 | class NoveltyWrapper(gym.core.Wrapper): 12 | """ 13 | Wrapper to modify the environment according to novelty ontology at a certain point 14 | If novelty_episode = 0 (default) then there is no novelty 15 | This make the assumption--which is valid for all standard Minigrid environments as of 2021 16 | that MiniGrid environments do not ever overload the `reset()` gym function. So, we will assume 17 | that all relevant novelties are to be implemented in the `_post_novelty_gen_grid` function, 18 | which is called by `_post_novelty_reset` after the novelty episode is reached. 19 | """ 20 | 21 | def __init__(self, env, novelty_episode=-1, novelty_step=-1): 22 | super().__init__(env) 23 | ## ensure that one and only one of novelty_step or novelty_episode is used 24 | assert novelty_episode > 0 or novelty_step > 0 25 | assert not (novelty_episode > 0 and novelty_step > 0) 26 | self.novelty_episode = novelty_episode 27 | self.novelty_step = novelty_step 28 | if novelty_step != -1: 29 | self.novelty_flag = 'step' 30 | else: 31 | self.novelty_flag = 'episode' 32 | self.num_episodes = 0 33 | self.num_steps = 0 34 | self.post_novelty = False 35 | 36 | def reset(self, **kwargs): 37 | # don't count resets that have no steps 38 | if self.unwrapped.step_count: 39 | self.num_episodes += 1 40 | self.num_steps += self.unwrapped.step_count 41 | # if episode matches, inject novelty and record step size 42 | if self.num_episodes >= self.novelty_episode: 43 | if self.post_novelty is False: 44 | print('############################') 45 | print('##### Novelty Injected #####') 46 | print(f'##### Step {self.num_steps} #####') 47 | print('############################') 48 | self.novelty_step = self.num_steps 49 | self.post_novelty = True 50 | # self.env.reset(**kwargs) 51 | return self._post_novelty_reset(**kwargs) 52 | else: 53 | return self.env.reset(**kwargs) 54 | 55 | def _post_novelty_reset(self, **kwargs): 56 | # Current position and direction of the agent 57 | #todo all this should be unwrapped 58 | self.env.agent_pos = None 59 | self.env.agent_dir = None 60 | 61 | # Generate a new random grid at the start of each episode 62 | # To keep the same grid for each episode, call env.seed() with 63 | # the same seed before calling env.reset() 64 | self._post_novelty_gen_grid(self.width, self.height, **kwargs) 65 | 66 | # These fields should be defined by _gen_grid 67 | assert self.env.agent_pos is not None 68 | assert self.env.agent_dir is not None 69 | 70 | # Check that the agent doesn't overlap with an object 71 | start_cell = self.env.grid.get(*self.env.agent_pos) 72 | assert start_cell is None or start_cell.can_overlap() 73 | 74 | # Item picked up, being carried, initially nothing 75 | self.env.carrying = None 76 | 77 | # Step count since episode start 78 | self.env.step_count = 0 79 | 80 | # Return first observation 81 | obs = self.env.gen_obs() 82 | return obs 83 | 84 | # @abc.abstractmethod 85 | def _post_novelty_gen_grid(self, width, height): 86 | """ 87 | This is the main function where you implement the novelty 88 | """ 89 | return self.unwrapped._gen_grid(width, height) 90 | # raise NotImplementedError 91 | 92 | def _rand_int(self, low, high): 93 | return self.env.np_random.randint(low, high) 94 | 95 | 96 | class DoorKeyChange(NoveltyWrapper): 97 | 98 | def __init__(self, env, novelty_episode): 99 | super().__init__(env, novelty_episode) 100 | 101 | def _post_novelty_gen_grid(self, width, height): 102 | # Create an empty grid 103 | self.env.grid = Grid(width, height) 104 | 105 | # Generate the surrounding walls 106 | self.env.grid.wall_rect(0, 0, width, height) 107 | 108 | # Place a goal in the bottom-right corner 109 | self.env.put_obj(Goal(), width - 2, height - 2) 110 | 111 | # Create a vertical splitting wall 112 | splitIdx = self._rand_int(2, width - 2) 113 | self.env.grid.vert_wall(splitIdx, 0) 114 | 115 | # Place the agent at a random position and orientation 116 | # on the left side of the splitting wall 117 | self.env.place_agent(size=(splitIdx, height)) 118 | 119 | # Place a door in the wall 120 | doorIdx = self._rand_int(1, width-2) 121 | # Yellow door object that will open when toggled with a blue key 122 | self.env.put_obj(ColorDoor('yellow', is_locked=True, key_color='blue'), splitIdx, doorIdx) 123 | 124 | # Place a yellow key on the left side 125 | self.env.place_obj( 126 | obj=Key('yellow'), 127 | top=(0, 0), 128 | size=(splitIdx, height) 129 | ) 130 | 131 | # Place a blue key on the left side 132 | self.env.place_obj( 133 | obj=Key('blue'), 134 | top=(0, 0), 135 | size=(splitIdx, height) 136 | ) 137 | 138 | self.env.mission = "use different color key to open the door and then get to the goal" 139 | 140 | 141 | class DoorLockToggle(NoveltyWrapper): 142 | 143 | def __init__(self, env, novelty_episode): 144 | super().__init__(env, novelty_episode) 145 | 146 | def _post_novelty_gen_grid(self, width, height): 147 | # Create an empty grid 148 | self.env.grid = Grid(width, height) 149 | 150 | # Generate the surrounding walls 151 | self.env.grid.wall_rect(0, 0, width, height) 152 | 153 | # Place a goal in the bottom-right corner 154 | self.env.put_obj(Goal(), width - 2, height - 2) 155 | 156 | # Create a vertical splitting wall 157 | splitIdx = self._rand_int(2, width - 2) 158 | self.env.grid.vert_wall(splitIdx, 0) 159 | 160 | # Place the agent at a random position and orientation 161 | # on the left side of the splitting wall 162 | self.env.place_agent(size=(splitIdx, height)) 163 | 164 | # Place a door in the wall 165 | doorIdx = self._rand_int(1, width - 2) 166 | # Yellow door object that is already unlocked 167 | self.env.put_obj(Door('yellow', is_locked=False), splitIdx, doorIdx) 168 | 169 | # Place a yellow key on the left side 170 | self.env.place_obj( 171 | obj=Key('yellow'), 172 | top=(0, 0), 173 | size=(splitIdx, height) 174 | ) 175 | 176 | self.env.mission = "go through the unlocked door and then get to the goal" 177 | 178 | 179 | class DoorNumKeys(NoveltyWrapper): 180 | 181 | def __init__(self, env, novelty_episode): 182 | super().__init__(env, novelty_episode) 183 | 184 | def _post_novelty_gen_grid(self, width, height): 185 | # Create an empty grid 186 | self.env.grid = Grid(width, height) 187 | 188 | # Generate the surrounding walls 189 | self.env.grid.wall_rect(0, 0, width, height) 190 | 191 | # Place a goal in the bottom-right corner 192 | self.env.put_obj(Goal(), width - 2, height - 2) 193 | 194 | # Create a vertical splitting wall 195 | splitIdx = self._rand_int(3, width - 2) 196 | self.env.grid.vert_wall(splitIdx, 0) 197 | 198 | # Place the agent at a random position and orientation 199 | # on the left side of the splitting wall 200 | self.env.place_agent(size=(splitIdx, height)) 201 | 202 | # Place a door in the wall 203 | doorIdx = self._rand_int(1, width-2) 204 | # Yellow door that requires a yellow key and a blue key to be opened 205 | self.env.put_obj(MultiKeyDoor( 206 | 'yellow', 207 | is_locked=True, 208 | key_colors=['yellow', 'blue']), 209 | splitIdx, doorIdx) 210 | 211 | # Place a yellow key on the left side 212 | self.env.place_obj( 213 | obj=Key('yellow'), 214 | top=(0, 0), 215 | size=(splitIdx, height) 216 | ) 217 | 218 | # Place a blue key on the left side 219 | self.env.place_obj( 220 | obj=Key('blue'), 221 | top=(0, 0), 222 | size=(splitIdx, height) 223 | ) 224 | 225 | self.env.mission = "use two keys to open the door and then get to the goal" 226 | 227 | 228 | class GoalLocationChange(NoveltyWrapper): 229 | 230 | def __init__(self, env, novelty_episode): 231 | super().__init__(env, novelty_episode) 232 | 233 | def _post_novelty_gen_grid(self, width, height): 234 | # Create an empty grid 235 | self.env.grid = Grid(width, height) 236 | 237 | # Generate the surrounding walls 238 | self.env.grid.wall_rect(0, 0, width, height) 239 | 240 | # Changes the location of the goal from the bottom-right corner to the top-right corner 241 | self.env.put_obj(Goal(), width - 2, 1) 242 | 243 | # Create a vertical splitting wall 244 | splitIdx = self._rand_int(2, width-2) 245 | self.env.grid.vert_wall(splitIdx, 0) 246 | 247 | # Place the agent at a random position and orientation 248 | # on the left side of the splitting wall 249 | self.env.place_agent(size=(splitIdx, height)) 250 | 251 | # Place a door in the wall 252 | doorIdx = self._rand_int(1, width-2) 253 | self.env.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx) 254 | 255 | # Place a yellow key on the left side 256 | self.env.place_obj( 257 | obj=Key('yellow'), 258 | top=(0, 0), 259 | size=(splitIdx, height) 260 | ) 261 | 262 | self.env.mission = "use the key to open the door and then get to the goal whose location has changed" 263 | 264 | 265 | class ImperviousToLava(NoveltyWrapper): 266 | 267 | def __init__(self, env, novelty_episode): 268 | super().__init__(env, novelty_episode) 269 | 270 | # def reset(self, **kwargs): 271 | # self.num_episodes += 1 272 | # return self.env.reset(**kwargs) 273 | 274 | def step(self, action, **kwargs): 275 | if self.post_novelty: 276 | fwd_pos = self.env.front_pos 277 | fwd_cell = self.env.grid.get(*fwd_pos) 278 | obs, reward, done, info = self.env.step(action, **kwargs) 279 | if done and fwd_cell and fwd_cell.type == 'lava': 280 | self.env.agent_pos = fwd_pos 281 | obs = self.env.gen_obs()['image'] 282 | done = False 283 | return obs, reward, done, info 284 | return self.env.step(action, **kwargs) 285 | 286 | 287 | class LavaHurts(NoveltyWrapper): 288 | """ 289 | for an environment where lava doesn't hurt already 290 | """ 291 | def __init__(self, env, novelty_episode): 292 | super().__init__(env, novelty_episode) 293 | 294 | # def reset(self, **kwargs): 295 | # self.num_episodes += 1 296 | # return self.env.reset(**kwargs) 297 | 298 | def step(self, action, **kwargs): 299 | if self.post_novelty: 300 | fwd_pos = self.env.front_pos 301 | fwd_cell = self.env.grid.get(*fwd_pos) 302 | obs, reward, done, info = self.env.step(action, **kwargs) 303 | if fwd_cell and fwd_cell.type == 'lava': 304 | self.env.agent_pos = fwd_pos 305 | obs = self.env.gen_obs()['image'] 306 | done = True 307 | return obs, reward, done, info 308 | return self.env.step(action, **kwargs) 309 | 310 | 311 | class ForwardMovementSpeed(NoveltyWrapper): 312 | 313 | def __init__(self, env, novelty_episode): 314 | super().__init__(env, novelty_episode) 315 | 316 | # def reset(self, **kwargs): 317 | # self.num_episodes += 1 318 | # return self.env.reset(**kwargs) 319 | 320 | def step(self, action, **kwargs): 321 | if self.post_novelty: 322 | if action == self.env.actions.forward: 323 | obs, reward, done, info = self.env.step(action, **kwargs) 324 | if done: 325 | return obs, reward, done, info 326 | self.env.step_count -= 1 327 | return self.env.step(action, **kwargs) 328 | 329 | 330 | class ActionReptition(NoveltyWrapper): 331 | 332 | def __init__(self, env, novelty_episode): 333 | super().__init__(env, novelty_episode) 334 | self.prev_action = None 335 | 336 | # def reset(self, **kwargs): 337 | # self.num_episodes += 1 338 | # return self.env.reset(**kwargs) 339 | 340 | def step(self, action, **kwargs): 341 | if self.post_novelty: 342 | if action != self.prev_action: 343 | self.prev_action = action 344 | return self.env.step(self.env.actions.done) 345 | self.prev_action = None 346 | return self.env.step(action, **kwargs) 347 | 348 | 349 | class ActionRadius(NoveltyWrapper): 350 | 351 | def __init__(self, env, novelty_episode): 352 | super().__init__(env, novelty_episode) 353 | 354 | # def reset(self, **kwargs): 355 | # self.num_episodes += 1 356 | # return self.env.reset(**kwargs) 357 | 358 | def step(self, action, **kwargs): 359 | if self.post_novelty: 360 | obs, reward, done, info = self.env.step(action, **kwargs) 361 | if action == self.env.actions.pickup and self.env.carrying is None: 362 | agent_pos = self.env.agent_pos 363 | self.env.step(self.env.actions.forward, **kwargs) 364 | self.env.step(action, **kwargs) 365 | self.env.agent_pos = agent_pos 366 | self.env.step_count -= 2 367 | obs = self.env.gen_obs() 368 | return obs, reward, done, info 369 | return self.env.step(action, **kwargs) 370 | 371 | 372 | class Burdening(NoveltyWrapper): 373 | 374 | def __init__(self, env, novelty_episode): 375 | super().__init__(env, novelty_episode) 376 | 377 | # def reset(self, **kwargs): 378 | # return self.env.reset(**kwargs) 379 | 380 | def step(self, action, **kwargs): 381 | if self.post_novelty: 382 | if action == self.env.actions.forward and self.env.carrying: 383 | self.env.step_count += 1 384 | elif action == self.env.actions.forward and not self.env.carrying: 385 | obs, reward, done, info = self.env.step(action, **kwargs) 386 | if done: 387 | return obs, reward, done, info 388 | self.env.step_count -= 1 389 | return self.env.step(action, **kwargs) 390 | 391 | 392 | class ColorRestriction(NoveltyWrapper): 393 | 394 | def __init__(self, env, novelty_episode): 395 | super().__init__(env, novelty_episode) 396 | 397 | def _post_novelty_gen_grid(self, width, height): 398 | # Create an empty grid 399 | self.env.grid = Grid(width, height) 400 | 401 | # Generate the surrounding walls 402 | self.env.grid.wall_rect(0, 0, width, height) 403 | 404 | # Place a goal in the bottom-right corner 405 | self.env.put_obj(Goal(), width - 2, height - 2) 406 | 407 | # Create a vertical splitting wall 408 | splitIdx = self._rand_int(2, width - 2) 409 | self.env.grid.vert_wall(splitIdx, 0) 410 | 411 | # Place the agent at a random position and orientation 412 | # on the left side of the splitting wall 413 | self.env.place_agent(size=(splitIdx, height)) 414 | 415 | # Place a door in the wall 416 | doorIdx = self._rand_int(1, width - 2) 417 | self.env.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx) 418 | 419 | doorIdx = self._rand_int(1, width - 2) 420 | while isinstance(self.env.grid.get(splitIdx, doorIdx), Door): 421 | doorIdx = self._rand_int(1, width - 2) 422 | self.env.put_obj(Door('blue', is_locked=True), splitIdx, doorIdx) 423 | 424 | # Place a yellow key on the left side 425 | self.env.place_obj( 426 | obj=Key('yellow'), 427 | top=(0, 0), 428 | size=(splitIdx, height) 429 | ) 430 | 431 | # Place a blue key on the left side 432 | self.env.place_obj( 433 | obj=Key('blue'), 434 | top=(0, 0), 435 | size=(splitIdx, height) 436 | ) 437 | 438 | self.env.mission = "use blue key to open the blue door and then get to the goal" 439 | 440 | 441 | def step(self, action, **kwargs): 442 | if self.post_novelty: 443 | if action == self.env.actions.pickup: 444 | fwd_pos = self.env.front_pos 445 | fwd_cell = self.env.grid.get(*fwd_pos) 446 | if fwd_cell and fwd_cell.can_pickup() and fwd_cell.color == 'yellow': 447 | return self.env.step(self.env.actions.done, **kwargs) 448 | return self.env.step(action, **kwargs) 449 | 450 | 451 | 452 | 453 | 454 | 455 | -------------------------------------------------------------------------------- /novgrid_old/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eilab-gt/NovGrid/5873d2148b246ba9433307e8791ab794c0d7ca57/novgrid_old/utils/__init__.py -------------------------------------------------------------------------------- /novgrid_old/utils/baseline_utils.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | from torch import nn 3 | import gym 4 | from stable_baselines3.common.torch_layers import BaseFeaturesExtractor 5 | 6 | 7 | class MinigridCNN(BaseFeaturesExtractor): 8 | """ 9 | CNN for minigrid: 10 | :param observation_space: 11 | :param features_dim: Number of features extracted. 12 | This corresponds to the number of unit for the last layer. 13 | """ 14 | 15 | def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 64): 16 | super().__init__(observation_space, features_dim) 17 | # We assume CxHxW images (channels first) 18 | # Re-ordering will be done by pre-preprocessing or wrapper 19 | n_input_channels = observation_space.shape[0] 20 | final_dim = 64 21 | self.cnn = nn.Sequential( 22 | nn.Conv2d(n_input_channels, 16, (2, 2)), 23 | nn.ReLU(), 24 | nn.MaxPool2d((2, 2)), 25 | nn.Conv2d(16, 32, (2, 2)), 26 | nn.ReLU(), 27 | nn.Conv2d(32, final_dim, (2, 2)), 28 | nn.ReLU(), 29 | nn.Flatten()) 30 | # Compute shape by doing one forward pass 31 | with th.no_grad(): 32 | n_flatten = self.cnn( 33 | th.as_tensor(observation_space.sample()[None]).float()).shape[1] 34 | 35 | self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU()) 36 | 37 | def forward(self, observations: th.Tensor) -> th.Tensor: 38 | return self.linear(self.cnn(observations)) 39 | -------------------------------------------------------------------------------- /novgrid_old/utils/default.ini: -------------------------------------------------------------------------------- 1 | ; ML Defaults 2 | learning_rate=2.5e-4 3 | 4 | ; Experiment Defaults 5 | total_timesteps=10000000 6 | 7 | ; Novelty Defaults 8 | novelty-step=10000 -------------------------------------------------------------------------------- /novgrid_old/utils/novgrid_utils.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | from novgrid.novelty_generation.novelty_wrappers import NoveltyWrapper 4 | 5 | 6 | def make_env(env_name, wrappers=None, wrapper_args=None, novelty_episode=-1): 7 | ''' 8 | I think that you have to have this function because the 9 | vectorization code expects a function wrappers is a list 10 | 11 | Parameters 12 | ---------- 13 | env_name : str 14 | Name of the environment 15 | wrappers : list of functions 16 | List of Wrapper functions that will be applied to the environment 17 | wrapper_args : list of dicts 18 | List of dictionaries, where each dictionary contains arguments for the wrapper at the same index 19 | novelty_episode : int 20 | Episode number for novelty generation. To be deprecated 21 | ''' 22 | if wrappers is None: 23 | wrappers = [] 24 | 25 | def _init(): 26 | env = gym.make(env_name) 27 | ## Check to make sure that there are the same number of arg dicts as wrappers 28 | if wrapper_args is not None: 29 | assert len(wrapper_args) == len(wrappers) 30 | if wrappers: 31 | for idx, wrapper in enumerate(wrappers): 32 | if wrapper_args is not None: 33 | if issubclass(wrapper, NoveltyWrapper): 34 | print("DEPRECATION WARNING: NoveltyWrapper should be redesigned with novelty_episode as a wrapper arg") 35 | env = wrapper(env, novelty_episode=novelty_episode, **wrapper_args[idx]) 36 | else: 37 | env = wrapper(env, **wrapper_args[idx]) 38 | else: 39 | if issubclass(wrapper, NoveltyWrapper): 40 | print("DEPRECATION WARNING: NoveltyWrapper should be redesigned with novelty_episode as a wrapper arg") 41 | env = wrapper(env, novelty_episode=novelty_episode) 42 | else: 43 | env = wrapper(env) 44 | return env 45 | return _init 46 | 47 | -------------------------------------------------------------------------------- /novgrid_old/utils/parser.py: -------------------------------------------------------------------------------- 1 | import configargparse 2 | 3 | 4 | def getparser(inputs=None): 5 | """ 6 | Reminder: all values have to be here to be modified by a config file. 7 | Precendence: command line > environment variables > config file values > defaults 8 | """ 9 | p = configargparse.ArgParser(default_config_files=['default.ini']) 10 | p.add('--exp_config', required=False, is_config_file=True, help='config file path for the experiment') 11 | p.add('-t', '--total_timesteps', type=int, default=2500000, help='total timesteps per experiment') 12 | p.add('-e', '--env', type=str, default='MiniGrid-DoorKey-8x8-v0', help='Core environment') 13 | p.add('-s', '--saves_logs', type=str, default='novgrid_logs', help='where to save logs and models') 14 | p.add('--device', type=str, default='', help='device. code assumes empty means to autocheck') 15 | p.add('--load_model', type=str, default='', help='model to load. empty string learns from scratch') #models/best_model.zip') 16 | p.add('--num_exp', type=int, default=1, help='number of learning experiments per run') 17 | p.add('-w', '--wandb_track', default=False, action='store_true', help='whether or not to set up as a wandb run') 18 | p.add('--learning_rate', type=float, default=2.5e-4, help='Learning rate for optimization') 19 | p.add('--num_workers', type=int, default=1, help='number of learning workers, and therefore environments') 20 | p.add('--seed', type=int, default=13, help='seed for randomness') 21 | p.add('--debug', default=False, action='store_true') 22 | p.add('--novelty_wrapper', type=str, default='', help='novelty to inject into environment') 23 | p.add('--novelty_episode', type=int, default=10000, help='episode in which novelty is injected') 24 | p.add('--eval_interval', type=int, default=1000, help='how many steps between evaluatations') 25 | p.add('--log_interval', type=int, default=10, help='how many steps between logging') 26 | 27 | if inputs is None: 28 | parsed_args = p.parse_args() 29 | else: 30 | parsed_args = p.parse_args(inputs) 31 | print(parsed_args) 32 | return parsed_args 33 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | gymnasium 2 | minigrid 3 | numpy 4 | pandas 5 | pathtools 6 | pillow 7 | stable_baselines3 8 | tensorboard -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | import glob 4 | 5 | setup( 6 | name='novgrid', 7 | version='0.0.2', 8 | keywords='novelty, grid, memory, environment, agent, rl, openaigym, openai-gym, gym, gymnasium', 9 | url='https://github.com/eilab-gt/NovGrid', 10 | description='A novelty experimentation wrapper for minigrid', 11 | packages=['novgrid'], 12 | install_requires=[ 13 | 'numpy>=1.15.0', 14 | 'gymnasium', 15 | 'minigrid', 16 | 'stable_baselines3', 17 | ], 18 | data_files=glob.glob('novgrid/env_configs/json/*.json') 19 | ) 20 | --------------------------------------------------------------------------------