├── figures ├── Unlock.png ├── empty-env.png ├── fetch-env.png ├── DistShift1.png ├── DistShift2.png ├── multi-room.gif ├── UnlockPickup.png ├── door-key-env.png ├── four-rooms-env.png ├── gotodoor-6x6.mp4 ├── gotodoor-6x6.png ├── KeyCorridorS3R1.png ├── KeyCorridorS3R2.png ├── KeyCorridorS3R3.png ├── KeyCorridorS4R3.png ├── KeyCorridorS5R3.png ├── KeyCorridorS6R3.png ├── LavaCrossingS9N1.png ├── LavaCrossingS9N2.png ├── LavaCrossingS9N3.png ├── BlockedUnlockPickup.png ├── LavaCrossingS11N5.png ├── ObstructedMaze-1Dl.png ├── ObstructedMaze-1Dlh.png ├── ObstructedMaze-1Q.png ├── ObstructedMaze-2Dl.png ├── ObstructedMaze-2Dlh.png ├── ObstructedMaze-2Q.png ├── ObstructedMaze-4Q.png ├── SimpleCrossingS11N5.png ├── SimpleCrossingS9N1.png ├── SimpleCrossingS9N2.png ├── SimpleCrossingS9N3.png ├── door-key-curriculum.gif ├── dynamic_obstacles.gif ├── ObstructedMaze-1Dlhb.png └── ObstructedMaze-2Dlhb.png ├── .gitignore ├── .travis.yml ├── gym_minigrid ├── __init__.py ├── register.py ├── envs │ ├── __init__.py │ ├── unlock.py │ ├── unlockpickup.py │ ├── blockedunlockpickup.py │ ├── distshift.py │ ├── doorkey.py │ ├── empty.py │ ├── playground_v0.py │ ├── redbluedoors.py │ ├── fourrooms.py │ ├── gotoobject.py │ ├── fetch.py │ ├── gotodoor.py │ ├── keycorridor.py │ ├── lockedroom.py │ ├── putnear.py │ ├── dynamicobstacles.py │ ├── memory.py │ ├── crossing.py │ ├── obstructedmaze.py │ └── multiroom.py ├── rendering.py ├── wrappers.py ├── roomgrid.py └── minigrid.py ├── setup.py ├── LICENSE ├── manual_control.py ├── run_tests.py └── README.md /figures/Unlock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/Unlock.png -------------------------------------------------------------------------------- /figures/empty-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/empty-env.png -------------------------------------------------------------------------------- /figures/fetch-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/fetch-env.png -------------------------------------------------------------------------------- /figures/DistShift1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/DistShift1.png -------------------------------------------------------------------------------- /figures/DistShift2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/DistShift2.png -------------------------------------------------------------------------------- /figures/multi-room.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/multi-room.gif -------------------------------------------------------------------------------- /figures/UnlockPickup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/UnlockPickup.png -------------------------------------------------------------------------------- /figures/door-key-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/door-key-env.png -------------------------------------------------------------------------------- /figures/four-rooms-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/four-rooms-env.png -------------------------------------------------------------------------------- /figures/gotodoor-6x6.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/gotodoor-6x6.mp4 -------------------------------------------------------------------------------- /figures/gotodoor-6x6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/gotodoor-6x6.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *__pycache__ 3 | *egg-info 4 | trained_models 5 | 6 | # PyPI 7 | build/* 8 | dist/* 9 | -------------------------------------------------------------------------------- /figures/KeyCorridorS3R1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS3R1.png -------------------------------------------------------------------------------- /figures/KeyCorridorS3R2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS3R2.png -------------------------------------------------------------------------------- /figures/KeyCorridorS3R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS3R3.png -------------------------------------------------------------------------------- /figures/KeyCorridorS4R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS4R3.png -------------------------------------------------------------------------------- /figures/KeyCorridorS5R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS5R3.png -------------------------------------------------------------------------------- /figures/KeyCorridorS6R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS6R3.png -------------------------------------------------------------------------------- /figures/LavaCrossingS9N1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/LavaCrossingS9N1.png -------------------------------------------------------------------------------- /figures/LavaCrossingS9N2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/LavaCrossingS9N2.png -------------------------------------------------------------------------------- /figures/LavaCrossingS9N3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/LavaCrossingS9N3.png -------------------------------------------------------------------------------- /figures/BlockedUnlockPickup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/BlockedUnlockPickup.png -------------------------------------------------------------------------------- /figures/LavaCrossingS11N5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/LavaCrossingS11N5.png -------------------------------------------------------------------------------- /figures/ObstructedMaze-1Dl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-1Dl.png -------------------------------------------------------------------------------- /figures/ObstructedMaze-1Dlh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-1Dlh.png -------------------------------------------------------------------------------- /figures/ObstructedMaze-1Q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-1Q.png -------------------------------------------------------------------------------- /figures/ObstructedMaze-2Dl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-2Dl.png -------------------------------------------------------------------------------- /figures/ObstructedMaze-2Dlh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-2Dlh.png -------------------------------------------------------------------------------- /figures/ObstructedMaze-2Q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-2Q.png -------------------------------------------------------------------------------- /figures/ObstructedMaze-4Q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-4Q.png -------------------------------------------------------------------------------- /figures/SimpleCrossingS11N5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/SimpleCrossingS11N5.png -------------------------------------------------------------------------------- /figures/SimpleCrossingS9N1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/SimpleCrossingS9N1.png -------------------------------------------------------------------------------- /figures/SimpleCrossingS9N2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/SimpleCrossingS9N2.png -------------------------------------------------------------------------------- /figures/SimpleCrossingS9N3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/SimpleCrossingS9N3.png -------------------------------------------------------------------------------- /figures/door-key-curriculum.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/door-key-curriculum.gif -------------------------------------------------------------------------------- /figures/dynamic_obstacles.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/dynamic_obstacles.gif -------------------------------------------------------------------------------- /figures/ObstructedMaze-1Dlhb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-1Dlhb.png -------------------------------------------------------------------------------- /figures/ObstructedMaze-2Dlhb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-2Dlhb.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | 5 | # command to install dependencies 6 | install: 7 | - pip3 install -e . 8 | 9 | # command to run tests 10 | script: ./run_tests.py 11 | -------------------------------------------------------------------------------- /gym_minigrid/__init__.py: -------------------------------------------------------------------------------- 1 | # Import the envs module so that envs register themselves 2 | import gym_minigrid.envs 3 | 4 | # Import wrappers so it's accessible when installing with pip 5 | import gym_minigrid.wrappers 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='gym_minigrid', 5 | version='0.0.5', 6 | keywords='memory, environment, agent, rl, openaigym, openai-gym, gym', 7 | url='https://github.com/maximecb/gym-minigrid', 8 | description='Minimalistic gridworld package for OpenAI Gym', 9 | packages=['gym_minigrid', 'gym_minigrid.envs'], 10 | install_requires=[ 11 | 'gym>=0.9.6', 12 | 'numpy>=1.15.0', 13 | 'pyqt5>=5.10.1' 14 | ] 15 | ) 16 | -------------------------------------------------------------------------------- /gym_minigrid/register.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register as gym_register 2 | 3 | env_list = [] 4 | 5 | def register( 6 | id, 7 | entry_point, 8 | reward_threshold=0.95 9 | ): 10 | assert id.startswith("MiniGrid-") 11 | assert id not in env_list 12 | 13 | # Register the environment with OpenAI gym 14 | gym_register( 15 | id=id, 16 | entry_point=entry_point, 17 | reward_threshold=reward_threshold 18 | ) 19 | 20 | # Add the environment to the set 21 | env_list.append(id) 22 | -------------------------------------------------------------------------------- /gym_minigrid/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.envs.empty import * 2 | from gym_minigrid.envs.doorkey import * 3 | from gym_minigrid.envs.multiroom import * 4 | from gym_minigrid.envs.fetch import * 5 | from gym_minigrid.envs.gotoobject import * 6 | from gym_minigrid.envs.gotodoor import * 7 | from gym_minigrid.envs.putnear import * 8 | from gym_minigrid.envs.lockedroom import * 9 | from gym_minigrid.envs.keycorridor import * 10 | from gym_minigrid.envs.unlock import * 11 | from gym_minigrid.envs.unlockpickup import * 12 | from gym_minigrid.envs.blockedunlockpickup import * 13 | from gym_minigrid.envs.playground_v0 import * 14 | from gym_minigrid.envs.redbluedoors import * 15 | from gym_minigrid.envs.obstructedmaze import * 16 | from gym_minigrid.envs.memory import * 17 | from gym_minigrid.envs.fourrooms import * 18 | from gym_minigrid.envs.crossing import * 19 | from gym_minigrid.envs.dynamicobstacles import * 20 | from gym_minigrid.envs.distshift import * 21 | -------------------------------------------------------------------------------- /gym_minigrid/envs/unlock.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import Ball 2 | from gym_minigrid.roomgrid import RoomGrid 3 | from gym_minigrid.register import register 4 | 5 | class Unlock(RoomGrid): 6 | """ 7 | Unlock a door 8 | """ 9 | 10 | def __init__(self, seed=None): 11 | room_size = 6 12 | super().__init__( 13 | num_rows=1, 14 | num_cols=2, 15 | room_size=room_size, 16 | max_steps=8*room_size**2, 17 | seed=seed 18 | ) 19 | 20 | def _gen_grid(self, width, height): 21 | super()._gen_grid(width, height) 22 | 23 | # Make sure the two rooms are directly connected by a locked door 24 | door, _ = self.add_door(0, 0, 0, locked=True) 25 | # Add a key to unlock the door 26 | self.add_object(0, 0, 'key', door.color) 27 | 28 | self.place_agent(0, 0) 29 | 30 | self.door = door 31 | self.mission = "open the door" 32 | 33 | def step(self, action): 34 | obs, reward, done, info = super().step(action) 35 | 36 | if action == self.actions.toggle: 37 | if self.door.is_open: 38 | reward = self._reward() 39 | done = True 40 | 41 | return obs, reward, done, info 42 | 43 | register( 44 | id='MiniGrid-Unlock-v0', 45 | entry_point='gym_minigrid.envs:Unlock' 46 | ) 47 | -------------------------------------------------------------------------------- /gym_minigrid/envs/unlockpickup.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import Ball 2 | from gym_minigrid.roomgrid import RoomGrid 3 | from gym_minigrid.register import register 4 | 5 | class UnlockPickup(RoomGrid): 6 | """ 7 | Unlock a door, then pick up a box in another room 8 | """ 9 | 10 | def __init__(self, seed=None): 11 | room_size = 6 12 | super().__init__( 13 | num_rows=1, 14 | num_cols=2, 15 | room_size=room_size, 16 | max_steps=8*room_size**2, 17 | seed=seed 18 | ) 19 | 20 | def _gen_grid(self, width, height): 21 | super()._gen_grid(width, height) 22 | 23 | # Add a box to the room on the right 24 | obj, _ = self.add_object(1, 0, kind="box") 25 | # Make sure the two rooms are directly connected by a locked door 26 | door, _ = self.add_door(0, 0, 0, locked=True) 27 | # Add a key to unlock the door 28 | self.add_object(0, 0, 'key', door.color) 29 | 30 | self.place_agent(0, 0) 31 | 32 | self.obj = obj 33 | self.mission = "pick up the %s %s" % (obj.color, obj.type) 34 | 35 | def step(self, action): 36 | obs, reward, done, info = super().step(action) 37 | 38 | if action == self.actions.pickup: 39 | if self.carrying and self.carrying == self.obj: 40 | reward = self._reward() 41 | done = True 42 | 43 | return obs, reward, done, info 44 | 45 | register( 46 | id='MiniGrid-UnlockPickup-v0', 47 | entry_point='gym_minigrid.envs:UnlockPickup' 48 | ) 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Maxime Chevalier-Boisvert 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /gym_minigrid/envs/blockedunlockpickup.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import Ball 2 | from gym_minigrid.roomgrid import RoomGrid 3 | from gym_minigrid.register import register 4 | 5 | class BlockedUnlockPickup(RoomGrid): 6 | """ 7 | Unlock a door blocked by a ball, then pick up a box 8 | in another room 9 | """ 10 | 11 | def __init__(self, seed=None): 12 | room_size = 6 13 | super().__init__( 14 | num_rows=1, 15 | num_cols=2, 16 | room_size=room_size, 17 | max_steps=16*room_size**2, 18 | seed=seed 19 | ) 20 | 21 | def _gen_grid(self, width, height): 22 | super()._gen_grid(width, height) 23 | 24 | # Add a box to the room on the right 25 | obj, _ = self.add_object(1, 0, kind="box") 26 | # Make sure the two rooms are directly connected by a locked door 27 | door, pos = self.add_door(0, 0, 0, locked=True) 28 | # Block the door with a ball 29 | color = self._rand_color() 30 | self.grid.set(pos[0]-1, pos[1], Ball(color)) 31 | # Add a key to unlock the door 32 | self.add_object(0, 0, 'key', door.color) 33 | 34 | self.place_agent(0, 0) 35 | 36 | self.obj = obj 37 | self.mission = "pick up the %s %s" % (obj.color, obj.type) 38 | 39 | def step(self, action): 40 | obs, reward, done, info = super().step(action) 41 | 42 | if action == self.actions.pickup: 43 | if self.carrying and self.carrying == self.obj: 44 | reward = self._reward() 45 | done = True 46 | 47 | return obs, reward, done, info 48 | 49 | register( 50 | id='MiniGrid-BlockedUnlockPickup-v0', 51 | entry_point='gym_minigrid.envs:BlockedUnlockPickup' 52 | ) 53 | -------------------------------------------------------------------------------- /gym_minigrid/envs/distshift.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class DistShiftEnv(MiniGridEnv): 5 | """ 6 | Distributional shift environment. 7 | """ 8 | 9 | def __init__( 10 | self, 11 | width=9, 12 | height=7, 13 | agent_start_pos=(1,1), 14 | agent_start_dir=0, 15 | strip2_row=2 16 | ): 17 | self.agent_start_pos = agent_start_pos 18 | self.agent_start_dir = agent_start_dir 19 | self.goal_pos = (width-2, 1) 20 | self.strip2_row = strip2_row 21 | 22 | super().__init__( 23 | width=width, 24 | height=height, 25 | max_steps=4*width*height, 26 | # Set this to True for maximum speed 27 | see_through_walls=True 28 | ) 29 | 30 | def _gen_grid(self, width, height): 31 | # Create an empty grid 32 | self.grid = Grid(width, height) 33 | 34 | # Generate the surrounding walls 35 | self.grid.wall_rect(0, 0, width, height) 36 | 37 | # Place a goal square in the bottom-right corner 38 | self.grid.set(*self.goal_pos, Goal()) 39 | 40 | # Place the lava rows 41 | for i in range(self.width - 6): 42 | self.grid.set(3+i, 1, Lava()) 43 | self.grid.set(3+i, self.strip2_row, Lava()) 44 | 45 | # Place the agent 46 | if self.agent_start_pos is not None: 47 | self.agent_pos = self.agent_start_pos 48 | self.agent_dir = self.agent_start_dir 49 | else: 50 | self.place_agent() 51 | 52 | self.mission = "get to the green goal square" 53 | 54 | class DistShift1(DistShiftEnv): 55 | def __init__(self): 56 | super().__init__(strip2_row=2) 57 | 58 | class DistShift2(DistShiftEnv): 59 | def __init__(self): 60 | super().__init__(strip2_row=5) 61 | 62 | register( 63 | id='MiniGrid-DistShift1-v0', 64 | entry_point='gym_minigrid.envs:DistShift1' 65 | ) 66 | 67 | register( 68 | id='MiniGrid-DistShift2-v0', 69 | entry_point='gym_minigrid.envs:DistShift2' 70 | ) 71 | -------------------------------------------------------------------------------- /manual_control.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from __future__ import division, print_function 4 | 5 | import sys 6 | import numpy 7 | import gym 8 | import time 9 | from optparse import OptionParser 10 | 11 | import gym_minigrid 12 | 13 | def main(): 14 | parser = OptionParser() 15 | parser.add_option( 16 | "-e", 17 | "--env-name", 18 | dest="env_name", 19 | help="gym environment to load", 20 | default='MiniGrid-MultiRoom-N6-v0' 21 | ) 22 | (options, args) = parser.parse_args() 23 | 24 | # Load the gym environment 25 | env = gym.make(options.env_name) 26 | 27 | def resetEnv(): 28 | env.reset() 29 | if hasattr(env, 'mission'): 30 | print('Mission: %s' % env.mission) 31 | 32 | resetEnv() 33 | 34 | # Create a window to render into 35 | renderer = env.render('human') 36 | 37 | def keyDownCb(keyName): 38 | if keyName == 'BACKSPACE': 39 | resetEnv() 40 | return 41 | 42 | if keyName == 'ESCAPE': 43 | sys.exit(0) 44 | 45 | action = 0 46 | 47 | if keyName == 'LEFT': 48 | action = env.actions.left 49 | elif keyName == 'RIGHT': 50 | action = env.actions.right 51 | elif keyName == 'UP': 52 | action = env.actions.forward 53 | 54 | elif keyName == 'SPACE': 55 | action = env.actions.toggle 56 | elif keyName == 'PAGE_UP': 57 | action = env.actions.pickup 58 | elif keyName == 'PAGE_DOWN': 59 | action = env.actions.drop 60 | 61 | elif keyName == 'RETURN': 62 | action = env.actions.done 63 | 64 | else: 65 | print("unknown key %s" % keyName) 66 | return 67 | 68 | obs, reward, done, info = env.step(action) 69 | 70 | print('step=%s, reward=%.2f' % (env.step_count, reward)) 71 | 72 | if done: 73 | print('done!') 74 | resetEnv() 75 | 76 | renderer.window.setKeyDownCb(keyDownCb) 77 | 78 | while True: 79 | env.render('human') 80 | time.sleep(0.01) 81 | 82 | # If the window was closed 83 | if renderer.window == None: 84 | break 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /gym_minigrid/envs/doorkey.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class DoorKeyEnv(MiniGridEnv): 5 | """ 6 | Environment with a door and key, sparse reward 7 | """ 8 | 9 | def __init__(self, size=8): 10 | super().__init__( 11 | grid_size=size, 12 | max_steps=10*size*size 13 | ) 14 | 15 | def _gen_grid(self, width, height): 16 | # Create an empty grid 17 | self.grid = Grid(width, height) 18 | 19 | # Generate the surrounding walls 20 | self.grid.wall_rect(0, 0, width, height) 21 | 22 | # Place a goal in the bottom-right corner 23 | self.grid.set(width - 2, height - 2, Goal()) 24 | 25 | # Create a vertical splitting wall 26 | splitIdx = self._rand_int(2, width-2) 27 | self.grid.vert_wall(splitIdx, 0) 28 | 29 | # Place the agent at a random position and orientation 30 | # on the left side of the splitting wall 31 | self.place_agent(size=(splitIdx, height)) 32 | 33 | # Place a door in the wall 34 | doorIdx = self._rand_int(1, width-2) 35 | self.grid.set(splitIdx, doorIdx, Door('yellow', is_locked=True)) 36 | 37 | # Place a yellow key on the left side 38 | self.place_obj( 39 | obj=Key('yellow'), 40 | top=(0, 0), 41 | size=(splitIdx, height) 42 | ) 43 | 44 | self.mission = "use the key to open the door and then get to the goal" 45 | 46 | class DoorKeyEnv5x5(DoorKeyEnv): 47 | def __init__(self): 48 | super().__init__(size=5) 49 | 50 | class DoorKeyEnv6x6(DoorKeyEnv): 51 | def __init__(self): 52 | super().__init__(size=6) 53 | 54 | class DoorKeyEnv16x16(DoorKeyEnv): 55 | def __init__(self): 56 | super().__init__(size=16) 57 | 58 | register( 59 | id='MiniGrid-DoorKey-5x5-v0', 60 | entry_point='gym_minigrid.envs:DoorKeyEnv5x5' 61 | ) 62 | 63 | register( 64 | id='MiniGrid-DoorKey-6x6-v0', 65 | entry_point='gym_minigrid.envs:DoorKeyEnv6x6' 66 | ) 67 | 68 | register( 69 | id='MiniGrid-DoorKey-8x8-v0', 70 | entry_point='gym_minigrid.envs:DoorKeyEnv' 71 | ) 72 | 73 | register( 74 | id='MiniGrid-DoorKey-16x16-v0', 75 | entry_point='gym_minigrid.envs:DoorKeyEnv16x16' 76 | ) 77 | -------------------------------------------------------------------------------- /gym_minigrid/envs/empty.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class EmptyEnv(MiniGridEnv): 5 | """ 6 | Empty grid environment, no obstacles, sparse reward 7 | """ 8 | 9 | def __init__( 10 | self, 11 | size=8, 12 | agent_start_pos=(1,1), 13 | agent_start_dir=0, 14 | ): 15 | self.agent_start_pos = agent_start_pos 16 | self.agent_start_dir = agent_start_dir 17 | 18 | super().__init__( 19 | grid_size=size, 20 | max_steps=4*size*size, 21 | # Set this to True for maximum speed 22 | see_through_walls=True 23 | ) 24 | 25 | def _gen_grid(self, width, height): 26 | # Create an empty grid 27 | self.grid = Grid(width, height) 28 | 29 | # Generate the surrounding walls 30 | self.grid.wall_rect(0, 0, width, height) 31 | 32 | # Place a goal square in the bottom-right corner 33 | self.grid.set(width - 2, height - 2, Goal()) 34 | 35 | # Place the agent 36 | if self.agent_start_pos is not None: 37 | self.agent_pos = self.agent_start_pos 38 | self.agent_dir = self.agent_start_dir 39 | else: 40 | self.place_agent() 41 | 42 | self.mission = "get to the green goal square" 43 | 44 | class EmptyEnv5x5(EmptyEnv): 45 | def __init__(self): 46 | super().__init__(size=5) 47 | 48 | class EmptyRandomEnv5x5(EmptyEnv): 49 | def __init__(self): 50 | super().__init__(size=5, agent_start_pos=None) 51 | 52 | class EmptyEnv6x6(EmptyEnv): 53 | def __init__(self): 54 | super().__init__(size=6) 55 | 56 | class EmptyRandomEnv6x6(EmptyEnv): 57 | def __init__(self): 58 | super().__init__(size=6, agent_start_pos=None) 59 | 60 | class EmptyEnv16x16(EmptyEnv): 61 | def __init__(self): 62 | super().__init__(size=16) 63 | 64 | register( 65 | id='MiniGrid-Empty-5x5-v0', 66 | entry_point='gym_minigrid.envs:EmptyEnv5x5' 67 | ) 68 | 69 | register( 70 | id='MiniGrid-Empty-Random-5x5-v0', 71 | entry_point='gym_minigrid.envs:EmptyRandomEnv5x5' 72 | ) 73 | 74 | register( 75 | id='MiniGrid-Empty-6x6-v0', 76 | entry_point='gym_minigrid.envs:EmptyEnv6x6' 77 | ) 78 | 79 | register( 80 | id='MiniGrid-Empty-Random-6x6-v0', 81 | entry_point='gym_minigrid.envs:EmptyRandomEnv6x6' 82 | ) 83 | 84 | register( 85 | id='MiniGrid-Empty-8x8-v0', 86 | entry_point='gym_minigrid.envs:EmptyEnv' 87 | ) 88 | 89 | register( 90 | id='MiniGrid-Empty-16x16-v0', 91 | entry_point='gym_minigrid.envs:EmptyEnv16x16' 92 | ) 93 | -------------------------------------------------------------------------------- /gym_minigrid/envs/playground_v0.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class PlaygroundV0(MiniGridEnv): 5 | """ 6 | Environment with multiple rooms and random objects. 7 | This environment has no specific goals or rewards. 8 | """ 9 | 10 | def __init__(self): 11 | super().__init__(grid_size=19, max_steps=100) 12 | 13 | def _gen_grid(self, width, height): 14 | # Create the grid 15 | self.grid = Grid(width, height) 16 | 17 | # Generate the surrounding walls 18 | self.grid.horz_wall(0, 0) 19 | self.grid.horz_wall(0, height-1) 20 | self.grid.vert_wall(0, 0) 21 | self.grid.vert_wall(width-1, 0) 22 | 23 | roomW = width // 3 24 | roomH = height // 3 25 | 26 | # For each row of rooms 27 | for j in range(0, 3): 28 | 29 | # For each column 30 | for i in range(0, 3): 31 | xL = i * roomW 32 | yT = j * roomH 33 | xR = xL + roomW 34 | yB = yT + roomH 35 | 36 | # Bottom wall and door 37 | if i+1 < 3: 38 | self.grid.vert_wall(xR, yT, roomH) 39 | pos = (xR, self._rand_int(yT+1, yB-1)) 40 | color = self._rand_elem(COLOR_NAMES) 41 | self.grid.set(*pos, Door(color)) 42 | 43 | # Bottom wall and door 44 | if j+1 < 3: 45 | self.grid.horz_wall(xL, yB, roomW) 46 | pos = (self._rand_int(xL+1, xR-1), yB) 47 | color = self._rand_elem(COLOR_NAMES) 48 | self.grid.set(*pos, Door(color)) 49 | 50 | # Randomize the player start position and orientation 51 | self.place_agent() 52 | 53 | # Place random objects in the world 54 | types = ['key', 'ball', 'box'] 55 | for i in range(0, 12): 56 | objType = self._rand_elem(types) 57 | objColor = self._rand_elem(COLOR_NAMES) 58 | if objType == 'key': 59 | obj = Key(objColor) 60 | elif objType == 'ball': 61 | obj = Ball(objColor) 62 | elif objType == 'box': 63 | obj = Box(objColor) 64 | self.place_obj(obj) 65 | 66 | # No explicit mission in this environment 67 | self.mission = '' 68 | 69 | def step(self, action): 70 | obs, reward, done, info = MiniGridEnv.step(self, action) 71 | return obs, reward, done, info 72 | 73 | register( 74 | id='MiniGrid-Playground-v0', 75 | entry_point='gym_minigrid.envs:PlaygroundV0' 76 | ) 77 | -------------------------------------------------------------------------------- /gym_minigrid/envs/redbluedoors.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class RedBlueDoorEnv(MiniGridEnv): 5 | """ 6 | Single room with red and blue doors on opposite sides. 7 | The red door must be opened before the blue door to 8 | obtain a reward. 9 | """ 10 | 11 | def __init__(self, size=8): 12 | self.size = size 13 | 14 | super().__init__( 15 | width=2*size, 16 | height=size, 17 | max_steps=20*size*size 18 | ) 19 | 20 | def _gen_grid(self, width, height): 21 | # Create an empty grid 22 | self.grid = Grid(width, height) 23 | 24 | # Generate the grid walls 25 | self.grid.wall_rect(0, 0, 2*self.size, self.size) 26 | self.grid.wall_rect(self.size//2, 0, self.size, self.size) 27 | 28 | # Place the agent in the top-left corner 29 | self.place_agent(top=(self.size//2, 0), size=(self.size, self.size)) 30 | 31 | # Add a red door at a random position in the left wall 32 | pos = self._rand_int(1, self.size - 1) 33 | self.red_door = Door("red") 34 | self.grid.set(self.size//2, pos, self.red_door) 35 | 36 | # Add a blue door at a random position in the right wall 37 | pos = self._rand_int(1, self.size - 1) 38 | self.blue_door = Door("blue") 39 | self.grid.set(self.size//2 + self.size - 1, pos, self.blue_door) 40 | 41 | # Generate the mission string 42 | self.mission = "open the red door then the blue door" 43 | 44 | def step(self, action): 45 | red_door_opened_before = self.red_door.is_open 46 | blue_door_opened_before = self.blue_door.is_open 47 | 48 | obs, reward, done, info = MiniGridEnv.step(self, action) 49 | 50 | red_door_opened_after = self.red_door.is_open 51 | blue_door_opened_after = self.blue_door.is_open 52 | 53 | if blue_door_opened_after: 54 | if red_door_opened_before: 55 | reward = self._reward() 56 | done = True 57 | else: 58 | reward = 0 59 | done = True 60 | 61 | elif red_door_opened_after: 62 | if blue_door_opened_before: 63 | reward = 0 64 | done = True 65 | 66 | return obs, reward, done, info 67 | 68 | class RedBlueDoorEnv6x6(RedBlueDoorEnv): 69 | def __init__(self): 70 | super().__init__(size=6) 71 | 72 | register( 73 | id='MiniGrid-RedBlueDoors-6x6-v0', 74 | entry_point='gym_minigrid.envs:RedBlueDoorEnv6x6' 75 | ) 76 | 77 | register( 78 | id='MiniGrid-RedBlueDoors-8x8-v0', 79 | entry_point='gym_minigrid.envs:RedBlueDoorEnv' 80 | ) 81 | -------------------------------------------------------------------------------- /gym_minigrid/envs/fourrooms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from gym_minigrid.minigrid import * 5 | from gym_minigrid.register import register 6 | 7 | 8 | class FourRoomsEnv(MiniGridEnv): 9 | """ 10 | Classic 4 rooms gridworld environment. 11 | Can specify agent and goal position, if not it set at random. 12 | """ 13 | 14 | def __init__(self, agent_pos=None, goal_pos=None): 15 | self._agent_default_pos = agent_pos 16 | self._goal_default_pos = goal_pos 17 | super().__init__(grid_size=19, max_steps=100) 18 | 19 | def _gen_grid(self, width, height): 20 | # Create the grid 21 | self.grid = Grid(width, height) 22 | 23 | # Generate the surrounding walls 24 | self.grid.horz_wall(0, 0) 25 | self.grid.horz_wall(0, height - 1) 26 | self.grid.vert_wall(0, 0) 27 | self.grid.vert_wall(width - 1, 0) 28 | 29 | room_w = width // 2 30 | room_h = height // 2 31 | 32 | # For each row of rooms 33 | for j in range(0, 2): 34 | 35 | # For each column 36 | for i in range(0, 2): 37 | xL = i * room_w 38 | yT = j * room_h 39 | xR = xL + room_w 40 | yB = yT + room_h 41 | 42 | # Bottom wall and door 43 | if i + 1 < 2: 44 | self.grid.vert_wall(xR, yT, room_h) 45 | pos = (xR, self._rand_int(yT + 1, yB)) 46 | self.grid.set(*pos, None) 47 | 48 | # Bottom wall and door 49 | if j + 1 < 2: 50 | self.grid.horz_wall(xL, yB, room_w) 51 | pos = (self._rand_int(xL + 1, xR), yB) 52 | self.grid.set(*pos, None) 53 | 54 | # Randomize the player start position and orientation 55 | if self._agent_default_pos is not None: 56 | self.agent_pos = self._agent_default_pos 57 | self.grid.set(*self._agent_default_pos, None) 58 | self.agent_dir = self._rand_int(0, 4) # assuming random start direction 59 | else: 60 | self.place_agent() 61 | 62 | if self._goal_default_pos is not None: 63 | goal = Goal() 64 | self.grid.set(*self._goal_default_pos, goal) 65 | goal.init_pos, goal.cur_pos = self._goal_default_pos 66 | else: 67 | self.place_obj(Goal()) 68 | 69 | self.mission = 'Reach the goal' 70 | 71 | def step(self, action): 72 | obs, reward, done, info = MiniGridEnv.step(self, action) 73 | return obs, reward, done, info 74 | 75 | 76 | register( 77 | id='MiniGrid-FourRooms-v0', 78 | entry_point='gym_minigrid.envs:FourRoomsEnv' 79 | ) 80 | -------------------------------------------------------------------------------- /gym_minigrid/envs/gotoobject.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class GoToObjectEnv(MiniGridEnv): 5 | """ 6 | Environment in which the agent is instructed to go to a given object 7 | named using an English text string 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=6, 13 | numObjs=2 14 | ): 15 | self.numObjs = numObjs 16 | 17 | super().__init__( 18 | grid_size=size, 19 | max_steps=5*size**2, 20 | # Set this to True for maximum speed 21 | see_through_walls=True 22 | ) 23 | 24 | def _gen_grid(self, width, height): 25 | self.grid = Grid(width, height) 26 | 27 | # Generate the surrounding walls 28 | self.grid.wall_rect(0, 0, width, height) 29 | 30 | # Types and colors of objects we can generate 31 | types = ['key', 'ball', 'box'] 32 | 33 | objs = [] 34 | objPos = [] 35 | 36 | # Until we have generated all the objects 37 | while len(objs) < self.numObjs: 38 | objType = self._rand_elem(types) 39 | objColor = self._rand_elem(COLOR_NAMES) 40 | 41 | # If this object already exists, try again 42 | if (objType, objColor) in objs: 43 | continue 44 | 45 | if objType == 'key': 46 | obj = Key(objColor) 47 | elif objType == 'ball': 48 | obj = Ball(objColor) 49 | elif objType == 'box': 50 | obj = Box(objColor) 51 | 52 | pos = self.place_obj(obj) 53 | objs.append((objType, objColor)) 54 | objPos.append(pos) 55 | 56 | # Randomize the agent start position and orientation 57 | self.place_agent() 58 | 59 | # Choose a random object to be picked up 60 | objIdx = self._rand_int(0, len(objs)) 61 | self.targetType, self.target_color = objs[objIdx] 62 | self.target_pos = objPos[objIdx] 63 | 64 | descStr = '%s %s' % (self.target_color, self.targetType) 65 | self.mission = 'go to the %s' % descStr 66 | #print(self.mission) 67 | 68 | def step(self, action): 69 | obs, reward, done, info = MiniGridEnv.step(self, action) 70 | 71 | ax, ay = self.agent_pos 72 | tx, ty = self.target_pos 73 | 74 | # Toggle/pickup action terminates the episode 75 | if action == self.actions.toggle: 76 | done = True 77 | 78 | # Reward performing the done action next to the target object 79 | if action == self.actions.done: 80 | if abs(ax - tx) <= 1 and abs(ay - ty) <= 1: 81 | reward = self._reward() 82 | done = True 83 | 84 | return obs, reward, done, info 85 | 86 | class GotoEnv8x8N2(GoToObjectEnv): 87 | def __init__(self): 88 | super().__init__(size=8, numObjs=2) 89 | 90 | register( 91 | id='MiniGrid-GoToObject-6x6-N2-v0', 92 | entry_point='gym_minigrid.envs:GoToObjectEnv' 93 | ) 94 | 95 | register( 96 | id='MiniGrid-GoToObject-8x8-N2-v0', 97 | entry_point='gym_minigrid.envs:GotoEnv8x8N2' 98 | ) 99 | -------------------------------------------------------------------------------- /gym_minigrid/envs/fetch.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class FetchEnv(MiniGridEnv): 5 | """ 6 | Environment in which the agent has to fetch a random object 7 | named using English text strings 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=8, 13 | numObjs=3 14 | ): 15 | self.numObjs = numObjs 16 | 17 | super().__init__( 18 | grid_size=size, 19 | max_steps=5*size**2, 20 | # Set this to True for maximum speed 21 | see_through_walls=True 22 | ) 23 | 24 | def _gen_grid(self, width, height): 25 | self.grid = Grid(width, height) 26 | 27 | # Generate the surrounding walls 28 | self.grid.horz_wall(0, 0) 29 | self.grid.horz_wall(0, height-1) 30 | self.grid.vert_wall(0, 0) 31 | self.grid.vert_wall(width-1, 0) 32 | 33 | types = ['key', 'ball'] 34 | 35 | objs = [] 36 | 37 | # For each object to be generated 38 | while len(objs) < self.numObjs: 39 | objType = self._rand_elem(types) 40 | objColor = self._rand_elem(COLOR_NAMES) 41 | 42 | if objType == 'key': 43 | obj = Key(objColor) 44 | elif objType == 'ball': 45 | obj = Ball(objColor) 46 | 47 | self.place_obj(obj) 48 | objs.append(obj) 49 | 50 | # Randomize the player start position and orientation 51 | self.place_agent() 52 | 53 | # Choose a random object to be picked up 54 | target = objs[self._rand_int(0, len(objs))] 55 | self.targetType = target.type 56 | self.targetColor = target.color 57 | 58 | descStr = '%s %s' % (self.targetColor, self.targetType) 59 | 60 | # Generate the mission string 61 | idx = self._rand_int(0, 5) 62 | if idx == 0: 63 | self.mission = 'get a %s' % descStr 64 | elif idx == 1: 65 | self.mission = 'go get a %s' % descStr 66 | elif idx == 2: 67 | self.mission = 'fetch a %s' % descStr 68 | elif idx == 3: 69 | self.mission = 'go fetch a %s' % descStr 70 | elif idx == 4: 71 | self.mission = 'you must fetch a %s' % descStr 72 | assert hasattr(self, 'mission') 73 | 74 | def step(self, action): 75 | obs, reward, done, info = MiniGridEnv.step(self, action) 76 | 77 | if self.carrying: 78 | if self.carrying.color == self.targetColor and \ 79 | self.carrying.type == self.targetType: 80 | reward = self._reward() 81 | done = True 82 | else: 83 | reward = 0 84 | done = True 85 | 86 | return obs, reward, done, info 87 | 88 | class FetchEnv5x5N2(FetchEnv): 89 | def __init__(self): 90 | super().__init__(size=5, numObjs=2) 91 | 92 | class FetchEnv6x6N2(FetchEnv): 93 | def __init__(self): 94 | super().__init__(size=6, numObjs=2) 95 | 96 | register( 97 | id='MiniGrid-Fetch-5x5-N2-v0', 98 | entry_point='gym_minigrid.envs:FetchEnv5x5N2' 99 | ) 100 | 101 | register( 102 | id='MiniGrid-Fetch-6x6-N2-v0', 103 | entry_point='gym_minigrid.envs:FetchEnv6x6N2' 104 | ) 105 | 106 | register( 107 | id='MiniGrid-Fetch-8x8-N3-v0', 108 | entry_point='gym_minigrid.envs:FetchEnv' 109 | ) 110 | -------------------------------------------------------------------------------- /gym_minigrid/envs/gotodoor.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class GoToDoorEnv(MiniGridEnv): 5 | """ 6 | Environment in which the agent is instructed to go to a given object 7 | named using an English text string 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=5 13 | ): 14 | assert size >= 5 15 | 16 | super().__init__( 17 | grid_size=size, 18 | max_steps=5*size**2, 19 | # Set this to True for maximum speed 20 | see_through_walls=True 21 | ) 22 | 23 | def _gen_grid(self, width, height): 24 | # Create the grid 25 | self.grid = Grid(width, height) 26 | 27 | # Randomly vary the room width and height 28 | width = self._rand_int(5, width+1) 29 | height = self._rand_int(5, height+1) 30 | 31 | # Generate the surrounding walls 32 | self.grid.wall_rect(0, 0, width, height) 33 | 34 | # Generate the 4 doors at random positions 35 | doorPos = [] 36 | doorPos.append((self._rand_int(2, width-2), 0)) 37 | doorPos.append((self._rand_int(2, width-2), height-1)) 38 | doorPos.append((0, self._rand_int(2, height-2))) 39 | doorPos.append((width-1, self._rand_int(2, height-2))) 40 | 41 | # Generate the door colors 42 | doorColors = [] 43 | while len(doorColors) < len(doorPos): 44 | color = self._rand_elem(COLOR_NAMES) 45 | if color in doorColors: 46 | continue 47 | doorColors.append(color) 48 | 49 | # Place the doors in the grid 50 | for idx, pos in enumerate(doorPos): 51 | color = doorColors[idx] 52 | self.grid.set(*pos, Door(color)) 53 | 54 | # Randomize the agent start position and orientation 55 | self.place_agent(size=(width, height)) 56 | 57 | # Select a random target door 58 | doorIdx = self._rand_int(0, len(doorPos)) 59 | self.target_pos = doorPos[doorIdx] 60 | self.target_color = doorColors[doorIdx] 61 | 62 | # Generate the mission string 63 | self.mission = 'go to the %s door' % self.target_color 64 | 65 | def step(self, action): 66 | obs, reward, done, info = super().step(action) 67 | 68 | ax, ay = self.agent_pos 69 | tx, ty = self.target_pos 70 | 71 | # Don't let the agent open any of the doors 72 | if action == self.actions.toggle: 73 | done = True 74 | 75 | # Reward performing done action in front of the target door 76 | if action == self.actions.done: 77 | if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1): 78 | reward = self._reward() 79 | done = True 80 | 81 | return obs, reward, done, info 82 | 83 | class GoToDoor8x8Env(GoToDoorEnv): 84 | def __init__(self): 85 | super().__init__(size=8) 86 | 87 | class GoToDoor6x6Env(GoToDoorEnv): 88 | def __init__(self): 89 | super().__init__(size=6) 90 | 91 | register( 92 | id='MiniGrid-GoToDoor-5x5-v0', 93 | entry_point='gym_minigrid.envs:GoToDoorEnv' 94 | ) 95 | 96 | register( 97 | id='MiniGrid-GoToDoor-6x6-v0', 98 | entry_point='gym_minigrid.envs:GoToDoor6x6Env' 99 | ) 100 | 101 | register( 102 | id='MiniGrid-GoToDoor-8x8-v0', 103 | entry_point='gym_minigrid.envs:GoToDoor8x8Env' 104 | ) 105 | -------------------------------------------------------------------------------- /run_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import random 4 | import numpy as np 5 | import gym 6 | from gym_minigrid.register import env_list 7 | from gym_minigrid.minigrid import Grid, OBJECT_TO_IDX 8 | 9 | # Test specifically importing a specific environment 10 | from gym_minigrid.envs import DoorKeyEnv 11 | 12 | # Test importing wrappers 13 | from gym_minigrid.wrappers import * 14 | 15 | ############################################################################## 16 | 17 | print('%d environments registered' % len(env_list)) 18 | 19 | for env_name in env_list: 20 | print('testing "%s"' % env_name) 21 | 22 | # Load the gym environment 23 | env = gym.make(env_name) 24 | env.max_steps = min(env.max_steps, 200) 25 | env.reset() 26 | env.render('rgb_array') 27 | 28 | # Verify that the same seed always produces the same environment 29 | for i in range(0, 5): 30 | seed = 1337 + i 31 | env.seed(seed) 32 | grid1 = env.grid 33 | env.seed(seed) 34 | grid2 = env.grid 35 | assert grid1 == grid2 36 | 37 | env.reset() 38 | 39 | # Run for a few episodes 40 | num_episodes = 0 41 | while num_episodes < 5: 42 | # Pick a random action 43 | action = random.randint(0, env.action_space.n - 1) 44 | 45 | obs, reward, done, info = env.step(action) 46 | 47 | # Validate the agent position 48 | assert env.agent_pos[0] < env.width 49 | assert env.agent_pos[1] < env.height 50 | 51 | # Test observation encode/decode roundtrip 52 | img = obs['image'] 53 | vis_mask = img[:, :, 0] != OBJECT_TO_IDX['unseen'] # hackish 54 | img2 = Grid.decode(img).encode(vis_mask=vis_mask) 55 | assert np.array_equal(img, img2) 56 | 57 | # Test the env to string function 58 | str(env) 59 | 60 | # Check that the reward is within the specified range 61 | assert reward >= env.reward_range[0], reward 62 | assert reward <= env.reward_range[1], reward 63 | 64 | if done: 65 | num_episodes += 1 66 | env.reset() 67 | 68 | env.render('rgb_array') 69 | 70 | # Test the close method 71 | env.close() 72 | 73 | env = gym.make(env_name) 74 | env = ReseedWrapper(env) 75 | for _ in range(10): 76 | env.reset() 77 | env.step(0) 78 | env.close() 79 | 80 | env = gym.make(env_name) 81 | env = ImgObsWrapper(env) 82 | env.reset() 83 | env.step(0) 84 | env.close() 85 | 86 | # Test the fully observable wrapper 87 | env = gym.make(env_name) 88 | env = FullyObsWrapper(env) 89 | env.reset() 90 | obs, _, _, _ = env.step(0) 91 | assert obs.shape == env.observation_space.shape 92 | env.close() 93 | 94 | env = gym.make(env_name) 95 | env = FlatObsWrapper(env) 96 | env.reset() 97 | env.step(0) 98 | env.close() 99 | 100 | env = gym.make(env_name) 101 | env = AgentViewWrapper(env, 5) 102 | env.reset() 103 | env.step(0) 104 | env.close() 105 | 106 | ############################################################################## 107 | 108 | print('testing agent_sees method') 109 | env = gym.make('MiniGrid-DoorKey-6x6-v0') 110 | goal_pos = (env.grid.width - 2, env.grid.height - 2) 111 | 112 | # Test the "in" operator on grid objects 113 | assert ('green', 'goal') in env.grid 114 | assert ('blue', 'key') not in env.grid 115 | 116 | # Test the env.agent_sees() function 117 | env.reset() 118 | for i in range(0, 500): 119 | action = random.randint(0, env.action_space.n - 1) 120 | obs, reward, done, info = env.step(action) 121 | goal_visible = ('green', 'goal') in Grid.decode(obs['image']) 122 | agent_sees_goal = env.agent_sees(*goal_pos) 123 | assert agent_sees_goal == goal_visible 124 | if done: 125 | env.reset() 126 | 127 | ############################################################################# 128 | -------------------------------------------------------------------------------- /gym_minigrid/envs/keycorridor.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.roomgrid import RoomGrid 2 | from gym_minigrid.register import register 3 | 4 | class KeyCorridor(RoomGrid): 5 | """ 6 | A ball is behind a locked door, the key is placed in a 7 | random room. 8 | """ 9 | 10 | def __init__( 11 | self, 12 | num_rows=3, 13 | obj_type="ball", 14 | room_size=6, 15 | seed=None 16 | ): 17 | self.obj_type = obj_type 18 | 19 | super().__init__( 20 | room_size=room_size, 21 | num_rows=num_rows, 22 | max_steps=30*room_size**2, 23 | seed=seed, 24 | ) 25 | 26 | def _gen_grid(self, width, height): 27 | super()._gen_grid(width, height) 28 | 29 | # Connect the middle column rooms into a hallway 30 | for j in range(1, self.num_rows): 31 | self.remove_wall(1, j, 3) 32 | 33 | # Add a locked door on the bottom right 34 | # Add an object behind the locked door 35 | room_idx = self._rand_int(0, self.num_rows) 36 | door, _ = self.add_door(2, room_idx, 2, locked=True) 37 | obj, _ = self.add_object(2, room_idx, kind=self.obj_type) 38 | 39 | # Add a key in a random room on the left side 40 | self.add_object(0, self._rand_int(0, self.num_rows), 'key', door.color) 41 | 42 | # Place the agent in the middle 43 | self.place_agent(1, self.num_rows // 2) 44 | 45 | # Make sure all rooms are accessible 46 | self.connect_all() 47 | 48 | self.obj = obj 49 | self.mission = "pick up the %s %s" % (obj.color, obj.type) 50 | 51 | def step(self, action): 52 | obs, reward, done, info = super().step(action) 53 | 54 | if action == self.actions.pickup: 55 | if self.carrying and self.carrying == self.obj: 56 | reward = self._reward() 57 | done = True 58 | 59 | return obs, reward, done, info 60 | 61 | class KeyCorridorS3R1(KeyCorridor): 62 | def __init__(self, seed=None): 63 | super().__init__( 64 | room_size=3, 65 | num_rows=1, 66 | seed=seed 67 | ) 68 | 69 | class KeyCorridorS3R2(KeyCorridor): 70 | def __init__(self, seed=None): 71 | super().__init__( 72 | room_size=3, 73 | num_rows=2, 74 | seed=seed 75 | ) 76 | 77 | class KeyCorridorS3R3(KeyCorridor): 78 | def __init__(self, seed=None): 79 | super().__init__( 80 | room_size=3, 81 | num_rows=3, 82 | seed=seed 83 | ) 84 | 85 | class KeyCorridorS4R3(KeyCorridor): 86 | def __init__(self, seed=None): 87 | super().__init__( 88 | room_size=4, 89 | num_rows=3, 90 | seed=seed 91 | ) 92 | 93 | class KeyCorridorS5R3(KeyCorridor): 94 | def __init__(self, seed=None): 95 | super().__init__( 96 | room_size=5, 97 | num_rows=3, 98 | seed=seed 99 | ) 100 | 101 | class KeyCorridorS6R3(KeyCorridor): 102 | def __init__(self, seed=None): 103 | super().__init__( 104 | room_size=6, 105 | num_rows=3, 106 | seed=seed 107 | ) 108 | 109 | register( 110 | id='MiniGrid-KeyCorridorS3R1-v0', 111 | entry_point='gym_minigrid.envs:KeyCorridorS3R1' 112 | ) 113 | 114 | register( 115 | id='MiniGrid-KeyCorridorS3R2-v0', 116 | entry_point='gym_minigrid.envs:KeyCorridorS3R2' 117 | ) 118 | 119 | register( 120 | id='MiniGrid-KeyCorridorS3R3-v0', 121 | entry_point='gym_minigrid.envs:KeyCorridorS3R3' 122 | ) 123 | 124 | register( 125 | id='MiniGrid-KeyCorridorS4R3-v0', 126 | entry_point='gym_minigrid.envs:KeyCorridorS4R3' 127 | ) 128 | 129 | register( 130 | id='MiniGrid-KeyCorridorS5R3-v0', 131 | entry_point='gym_minigrid.envs:KeyCorridorS5R3' 132 | ) 133 | 134 | register( 135 | id='MiniGrid-KeyCorridorS6R3-v0', 136 | entry_point='gym_minigrid.envs:KeyCorridorS6R3' 137 | ) 138 | -------------------------------------------------------------------------------- /gym_minigrid/envs/lockedroom.py: -------------------------------------------------------------------------------- 1 | from gym import spaces 2 | from gym_minigrid.minigrid import * 3 | from gym_minigrid.register import register 4 | 5 | class Room: 6 | def __init__(self, 7 | top, 8 | size, 9 | doorPos 10 | ): 11 | self.top = top 12 | self.size = size 13 | self.doorPos = doorPos 14 | self.color = None 15 | self.locked = False 16 | 17 | def rand_pos(self, env): 18 | topX, topY = self.top 19 | sizeX, sizeY = self.size 20 | return env._rand_pos( 21 | topX + 1, topX + sizeX - 1, 22 | topY + 1, topY + sizeY - 1 23 | ) 24 | 25 | class LockedRoom(MiniGridEnv): 26 | """ 27 | Environment in which the agent is instructed to go to a given object 28 | named using an English text string 29 | """ 30 | 31 | def __init__( 32 | self, 33 | size=19 34 | ): 35 | super().__init__(grid_size=size, max_steps=10*size) 36 | 37 | def _gen_grid(self, width, height): 38 | # Create the grid 39 | self.grid = Grid(width, height) 40 | 41 | # Generate the surrounding walls 42 | for i in range(0, width): 43 | self.grid.set(i, 0, Wall()) 44 | self.grid.set(i, height-1, Wall()) 45 | for j in range(0, height): 46 | self.grid.set(0, j, Wall()) 47 | self.grid.set(width-1, j, Wall()) 48 | 49 | # Hallway walls 50 | lWallIdx = width // 2 - 2 51 | rWallIdx = width // 2 + 2 52 | for j in range(0, height): 53 | self.grid.set(lWallIdx, j, Wall()) 54 | self.grid.set(rWallIdx, j, Wall()) 55 | 56 | self.rooms = [] 57 | 58 | # Room splitting walls 59 | for n in range(0, 3): 60 | j = n * (height // 3) 61 | for i in range(0, lWallIdx): 62 | self.grid.set(i, j, Wall()) 63 | for i in range(rWallIdx, width): 64 | self.grid.set(i, j, Wall()) 65 | 66 | roomW = lWallIdx + 1 67 | roomH = height // 3 + 1 68 | self.rooms.append(Room( 69 | (0, j), 70 | (roomW, roomH), 71 | (lWallIdx, j + 3) 72 | )) 73 | self.rooms.append(Room( 74 | (rWallIdx, j), 75 | (roomW, roomH), 76 | (rWallIdx, j + 3) 77 | )) 78 | 79 | # Choose one random room to be locked 80 | lockedRoom = self._rand_elem(self.rooms) 81 | lockedRoom.locked = True 82 | goalPos = lockedRoom.rand_pos(self) 83 | self.grid.set(*goalPos, Goal()) 84 | 85 | # Assign the door colors 86 | colors = set(COLOR_NAMES) 87 | for room in self.rooms: 88 | color = self._rand_elem(sorted(colors)) 89 | colors.remove(color) 90 | room.color = color 91 | if room.locked: 92 | self.grid.set(*room.doorPos, Door(color, is_locked=True)) 93 | else: 94 | self.grid.set(*room.doorPos, Door(color)) 95 | 96 | # Select a random room to contain the key 97 | while True: 98 | keyRoom = self._rand_elem(self.rooms) 99 | if keyRoom != lockedRoom: 100 | break 101 | keyPos = keyRoom.rand_pos(self) 102 | self.grid.set(*keyPos, Key(lockedRoom.color)) 103 | 104 | # Randomize the player start position and orientation 105 | self.agent_pos = self.place_agent( 106 | top=(lWallIdx, 0), 107 | size=(rWallIdx-lWallIdx, height) 108 | ) 109 | 110 | # Generate the mission string 111 | self.mission = ( 112 | 'get the %s key from the %s room, ' 113 | 'unlock the %s door and ' 114 | 'go to the goal' 115 | ) % (lockedRoom.color, keyRoom.color, lockedRoom.color) 116 | 117 | def step(self, action): 118 | obs, reward, done, info = MiniGridEnv.step(self, action) 119 | return obs, reward, done, info 120 | 121 | register( 122 | id='MiniGrid-LockedRoom-v0', 123 | entry_point='gym_minigrid.envs:LockedRoom' 124 | ) 125 | -------------------------------------------------------------------------------- /gym_minigrid/envs/putnear.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class PutNearEnv(MiniGridEnv): 5 | """ 6 | Environment in which the agent is instructed to place an object near 7 | another object through a natural language string. 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=6, 13 | numObjs=2 14 | ): 15 | self.numObjs = numObjs 16 | 17 | super().__init__( 18 | grid_size=size, 19 | max_steps=5*size, 20 | # Set this to True for maximum speed 21 | see_through_walls=True 22 | ) 23 | 24 | def _gen_grid(self, width, height): 25 | self.grid = Grid(width, height) 26 | 27 | # Generate the surrounding walls 28 | self.grid.horz_wall(0, 0) 29 | self.grid.horz_wall(0, height-1) 30 | self.grid.vert_wall(0, 0) 31 | self.grid.vert_wall(width-1, 0) 32 | 33 | # Types and colors of objects we can generate 34 | types = ['key', 'ball', 'box'] 35 | 36 | objs = [] 37 | objPos = [] 38 | 39 | def near_obj(env, p1): 40 | for p2 in objPos: 41 | dx = p1[0] - p2[0] 42 | dy = p1[1] - p2[1] 43 | if abs(dx) <= 1 and abs(dy) <= 1: 44 | return True 45 | return False 46 | 47 | # Until we have generated all the objects 48 | while len(objs) < self.numObjs: 49 | objType = self._rand_elem(types) 50 | objColor = self._rand_elem(COLOR_NAMES) 51 | 52 | # If this object already exists, try again 53 | if (objType, objColor) in objs: 54 | continue 55 | 56 | if objType == 'key': 57 | obj = Key(objColor) 58 | elif objType == 'ball': 59 | obj = Ball(objColor) 60 | elif objType == 'box': 61 | obj = Box(objColor) 62 | 63 | pos = self.place_obj(obj, reject_fn=near_obj) 64 | 65 | objs.append((objType, objColor)) 66 | objPos.append(pos) 67 | 68 | # Randomize the agent start position and orientation 69 | self.place_agent() 70 | 71 | # Choose a random object to be moved 72 | objIdx = self._rand_int(0, len(objs)) 73 | self.move_type, self.moveColor = objs[objIdx] 74 | self.move_pos = objPos[objIdx] 75 | 76 | # Choose a target object (to put the first object next to) 77 | while True: 78 | targetIdx = self._rand_int(0, len(objs)) 79 | if targetIdx != objIdx: 80 | break 81 | self.target_type, self.target_color = objs[targetIdx] 82 | self.target_pos = objPos[targetIdx] 83 | 84 | self.mission = 'put the %s %s near the %s %s' % ( 85 | self.moveColor, 86 | self.move_type, 87 | self.target_color, 88 | self.target_type 89 | ) 90 | 91 | def step(self, action): 92 | preCarrying = self.carrying 93 | 94 | obs, reward, done, info = super().step(action) 95 | 96 | u, v = self.dir_vec 97 | ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v) 98 | tx, ty = self.target_pos 99 | 100 | # If we picked up the wrong object, terminate the episode 101 | if action == self.actions.pickup and self.carrying: 102 | if self.carrying.type != self.move_type or self.carrying.color != self.moveColor: 103 | done = True 104 | 105 | # If successfully dropping an object near the target 106 | if action == self.actions.drop and preCarrying: 107 | if self.grid.get(ox, oy) is preCarrying: 108 | if abs(ox - tx) <= 1 and abs(oy - ty) <= 1: 109 | reward = self._reward() 110 | done = True 111 | 112 | return obs, reward, done, info 113 | 114 | class PutNear8x8N3(PutNearEnv): 115 | def __init__(self): 116 | super().__init__(size=8, numObjs=3) 117 | 118 | register( 119 | id='MiniGrid-PutNear-6x6-N2-v0', 120 | entry_point='gym_minigrid.envs:PutNearEnv' 121 | ) 122 | 123 | register( 124 | id='MiniGrid-PutNear-8x8-N3-v0', 125 | entry_point='gym_minigrid.envs:PutNear8x8N3' 126 | ) 127 | -------------------------------------------------------------------------------- /gym_minigrid/envs/dynamicobstacles.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | from operator import add 4 | 5 | class DynamicObstaclesEnv(MiniGridEnv): 6 | """ 7 | Single-room square grid environment with moving obstacles 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=8, 13 | agent_start_pos=(1, 1), 14 | agent_start_dir=0, 15 | n_obstacles=4 16 | ): 17 | self.agent_start_pos = agent_start_pos 18 | self.agent_start_dir = agent_start_dir 19 | 20 | # Reduce obstacles if there are too many 21 | if n_obstacles <= size/2 + 1: 22 | self.n_obstacles = int(n_obstacles) 23 | else: 24 | self.n_obstacles = int(size/2) 25 | super().__init__( 26 | grid_size=size, 27 | max_steps=4 * size * size, 28 | # Set this to True for maximum speed 29 | see_through_walls=True, 30 | ) 31 | # Allow only 3 actions permitted: left, right, forward 32 | self.action_space = spaces.Discrete(self.actions.forward + 1) 33 | self.reward_range = (-1, 1) 34 | 35 | def _gen_grid(self, width, height): 36 | # Create an empty grid 37 | self.grid = Grid(width, height) 38 | 39 | # Generate the surrounding walls 40 | self.grid.wall_rect(0, 0, width, height) 41 | 42 | # Place a goal square in the bottom-right corner 43 | self.grid.set(width - 2, height - 2, Goal()) 44 | 45 | # Place the agent 46 | if self.agent_start_pos is not None: 47 | self.agent_pos = self.agent_start_pos 48 | self.agent_dir = self.agent_start_dir 49 | else: 50 | self.place_agent() 51 | 52 | # Place obstacles 53 | self.obstacles = [] 54 | for i_obst in range(self.n_obstacles): 55 | self.obstacles.append(Ball()) 56 | self.place_obj(self.obstacles[i_obst], max_tries=100) 57 | 58 | self.mission = "get to the green goal square" 59 | 60 | def step(self, action): 61 | # Invalid action 62 | if action >= self.action_space.n: 63 | action = 0 64 | 65 | # Check if there is an obstacle in front of the agent 66 | front_cell = self.grid.get(*self.front_pos) 67 | not_clear = front_cell and front_cell.type != 'goal' 68 | 69 | obs, reward, done, info = MiniGridEnv.step(self, action) 70 | 71 | # If the agent tries to walk over an obstacle 72 | if action == self.actions.forward and not_clear: 73 | reward = -1 74 | done = True 75 | return obs, reward, done, info 76 | 77 | # Update obstacle positions 78 | for i_obst in range(len(self.obstacles)): 79 | old_pos = self.obstacles[i_obst].cur_pos 80 | top = tuple(map(add, old_pos, (-1, -1))) 81 | 82 | try: 83 | self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100) 84 | self.grid.set(*old_pos, None) 85 | except: 86 | pass 87 | 88 | return obs, reward, done, info 89 | 90 | class DynamicObstaclesEnv5x5(DynamicObstaclesEnv): 91 | def __init__(self): 92 | super().__init__(size=5, n_obstacles=2) 93 | 94 | class DynamicObstaclesRandomEnv5x5(DynamicObstaclesEnv): 95 | def __init__(self): 96 | super().__init__(size=5, agent_start_pos=None, n_obstacles=2) 97 | 98 | class DynamicObstaclesEnv6x6(DynamicObstaclesEnv): 99 | def __init__(self): 100 | super().__init__(size=6, n_obstacles=3) 101 | 102 | class DynamicObstaclesRandomEnv6x6(DynamicObstaclesEnv): 103 | def __init__(self): 104 | super().__init__(size=6, agent_start_pos=None, n_obstacles=3) 105 | 106 | class DynamicObstaclesEnv16x16(DynamicObstaclesEnv): 107 | def __init__(self): 108 | super().__init__(size=16, n_obstacles=8) 109 | 110 | register( 111 | id='MiniGrid-Dynamic-Obstacles-5x5-v0', 112 | entry_point='gym_minigrid.envs:DynamicObstaclesEnv5x5' 113 | ) 114 | 115 | register( 116 | id='MiniGrid-Dynamic-Obstacles-Random-5x5-v0', 117 | entry_point='gym_minigrid.envs:DynamicObstaclesRandomEnv5x5' 118 | ) 119 | 120 | register( 121 | id='MiniGrid-Dynamic-Obstacles-6x6-v0', 122 | entry_point='gym_minigrid.envs:DynamicObstaclesEnv6x6' 123 | ) 124 | 125 | register( 126 | id='MiniGrid-Dynamic-Obstacles-Random-6x6-v0', 127 | entry_point='gym_minigrid.envs:DynamicObstaclesRandomEnv6x6' 128 | ) 129 | 130 | register( 131 | id='MiniGrid-Dynamic-Obstacles-8x8-v0', 132 | entry_point='gym_minigrid.envs:DynamicObstaclesEnv' 133 | ) 134 | 135 | register( 136 | id='MiniGrid-Dynamic-Obstacles-16x16-v0', 137 | entry_point='gym_minigrid.envs:DynamicObstaclesEnv16x16' 138 | ) 139 | -------------------------------------------------------------------------------- /gym_minigrid/envs/memory.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class MemoryEnv(MiniGridEnv): 5 | """ 6 | This environment is a memory test. The agent starts in a small room 7 | where it sees an object. It then has to go through a narrow hallway 8 | which ends in a split. At each end of the split there is an object, 9 | one of which is the same as the object in the starting room. The 10 | agent has to remember the initial object, and go to the matching 11 | object at split. 12 | """ 13 | 14 | def __init__( 15 | self, 16 | seed, 17 | size=8, 18 | random_length=False, 19 | ): 20 | self.random_length = random_length 21 | super().__init__( 22 | seed=seed, 23 | grid_size=size, 24 | max_steps=5*size**2, 25 | # Set this to True for maximum speed 26 | see_through_walls=False, 27 | ) 28 | 29 | def _gen_grid(self, width, height): 30 | self.grid = Grid(width, height) 31 | 32 | # Generate the surrounding walls 33 | self.grid.horz_wall(0, 0) 34 | self.grid.horz_wall(0, height-1) 35 | self.grid.vert_wall(0, 0) 36 | self.grid.vert_wall(width - 1, 0) 37 | 38 | assert height % 2 == 1 39 | upper_room_wall = height // 2 - 2 40 | lower_room_wall = height // 2 + 2 41 | if self.random_length: 42 | hallway_end = self._rand_int(4, width - 2) 43 | else: 44 | hallway_end = width - 3 45 | 46 | # Start room 47 | for i in range(1, 5): 48 | self.grid.set(i, upper_room_wall, Wall()) 49 | self.grid.set(i, lower_room_wall, Wall()) 50 | self.grid.set(4, upper_room_wall + 1, Wall()) 51 | self.grid.set(4, lower_room_wall - 1, Wall()) 52 | 53 | # Horizontal hallway 54 | for i in range(5, hallway_end): 55 | self.grid.set(i, upper_room_wall + 1, Wall()) 56 | self.grid.set(i, lower_room_wall - 1, Wall()) 57 | 58 | # Vertical hallway 59 | for j in range(0, height): 60 | if j != height // 2: 61 | self.grid.set(hallway_end, j, Wall()) 62 | self.grid.set(hallway_end + 2, j, Wall()) 63 | 64 | # Fix the player's start position and orientation 65 | self.agent_pos = (self._rand_int(1, hallway_end + 1), height // 2) 66 | self.agent_dir = 0 67 | 68 | # Place objects 69 | start_room_obj = self._rand_elem([Key, Ball]) 70 | self.grid.set(1, height // 2 - 1, start_room_obj('green')) 71 | 72 | other_objs = self._rand_elem([[Ball, Key], [Key, Ball]]) 73 | pos0 = (hallway_end + 1, height // 2 - 2) 74 | pos1 = (hallway_end + 1, height // 2 + 2) 75 | self.grid.set(*pos0, other_objs[0]('green')) 76 | self.grid.set(*pos1, other_objs[1]('green')) 77 | 78 | # Choose the target objects 79 | if start_room_obj == other_objs[0]: 80 | self.success_pos = (pos0[0], pos0[1] + 1) 81 | self.failure_pos = (pos1[0], pos1[1] - 1) 82 | else: 83 | self.success_pos = (pos1[0], pos1[1] - 1) 84 | self.failure_pos = (pos0[0], pos0[1] + 1) 85 | 86 | self.mission = 'go to the matching object at the end of the hallway' 87 | 88 | def step(self, action): 89 | if action == MiniGridEnv.Actions.pickup: 90 | action = MiniGridEnv.Actions.toggle 91 | obs, reward, done, info = MiniGridEnv.step(self, action) 92 | 93 | if tuple(self.agent_pos) == self.success_pos: 94 | reward = self._reward() 95 | done = True 96 | if tuple(self.agent_pos) == self.failure_pos: 97 | reward = 0 98 | done = True 99 | 100 | return obs, reward, done, info 101 | 102 | class MemoryS17Random(MemoryEnv): 103 | def __init__(self, seed=None): 104 | super().__init__(seed=seed, size=17, random_length=True) 105 | 106 | register( 107 | id='MiniGrid-MemoryS17Random-v0', 108 | entry_point='gym_minigrid.envs:MemoryS17Random', 109 | ) 110 | 111 | class MemoryS13Random(MemoryEnv): 112 | def __init__(self, seed=None): 113 | super().__init__(seed=seed, size=13, random_length=True) 114 | 115 | register( 116 | id='MiniGrid-MemoryS13Random-v0', 117 | entry_point='gym_minigrid.envs:MemoryS13Random', 118 | ) 119 | 120 | class MemoryS13(MemoryEnv): 121 | def __init__(self, seed=None): 122 | super().__init__(seed=seed, size=13) 123 | 124 | register( 125 | id='MiniGrid-MemoryS13-v0', 126 | entry_point='gym_minigrid.envs:MemoryS13', 127 | ) 128 | 129 | class MemoryS11(MemoryEnv): 130 | def __init__(self, seed=None): 131 | super().__init__(seed=seed, size=11) 132 | 133 | register( 134 | id='MiniGrid-MemoryS11-v0', 135 | entry_point='gym_minigrid.envs:MemoryS11', 136 | ) 137 | 138 | class MemoryS9(MemoryEnv): 139 | def __init__(self, seed=None): 140 | super().__init__(seed=seed, size=9) 141 | 142 | register( 143 | id='MiniGrid-MemoryS9-v0', 144 | entry_point='gym_minigrid.envs:MemoryS9', 145 | ) 146 | 147 | class MemoryS7(MemoryEnv): 148 | def __init__(self, seed=None): 149 | super().__init__(seed=seed, size=7) 150 | 151 | register( 152 | id='MiniGrid-MemoryS7-v0', 153 | entry_point='gym_minigrid.envs:MemoryS7', 154 | ) 155 | -------------------------------------------------------------------------------- /gym_minigrid/envs/crossing.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | import itertools as itt 5 | 6 | 7 | class CrossingEnv(MiniGridEnv): 8 | """ 9 | Environment with wall or lava obstacles, sparse reward. 10 | """ 11 | 12 | def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None): 13 | self.num_crossings = num_crossings 14 | self.obstacle_type = obstacle_type 15 | super().__init__( 16 | grid_size=size, 17 | max_steps=4*size*size, 18 | # Set this to True for maximum speed 19 | see_through_walls=False, 20 | seed=None 21 | ) 22 | 23 | def _gen_grid(self, width, height): 24 | assert width % 2 == 1 and height % 2 == 1 # odd size 25 | 26 | # Create an empty grid 27 | self.grid = Grid(width, height) 28 | 29 | # Generate the surrounding walls 30 | self.grid.wall_rect(0, 0, width, height) 31 | 32 | # Place the agent in the top-left corner 33 | self.agent_pos = (1, 1) 34 | self.agent_dir = 0 35 | 36 | # Place a goal square in the bottom-right corner 37 | self.grid.set(width - 2, height - 2, Goal()) 38 | 39 | # Place obstacles (lava or walls) 40 | v, h = object(), object() # singleton `vertical` and `horizontal` objects 41 | 42 | # Lava rivers or walls specified by direction and position in grid 43 | rivers = [(v, i) for i in range(2, height - 2, 2)] 44 | rivers += [(h, j) for j in range(2, width - 2, 2)] 45 | self.np_random.shuffle(rivers) 46 | rivers = rivers[:self.num_crossings] # sample random rivers 47 | rivers_v = sorted([pos for direction, pos in rivers if direction is v]) 48 | rivers_h = sorted([pos for direction, pos in rivers if direction is h]) 49 | obstacle_pos = itt.chain( 50 | itt.product(range(1, width - 1), rivers_h), 51 | itt.product(rivers_v, range(1, height - 1)), 52 | ) 53 | for i, j in obstacle_pos: 54 | self.grid.set(i, j, self.obstacle_type()) 55 | 56 | # Sample path to goal 57 | path = [h] * len(rivers_v) + [v] * len(rivers_h) 58 | self.np_random.shuffle(path) 59 | 60 | # Create openings 61 | limits_v = [0] + rivers_v + [height - 1] 62 | limits_h = [0] + rivers_h + [width - 1] 63 | room_i, room_j = 0, 0 64 | for direction in path: 65 | if direction is h: 66 | i = limits_v[room_i + 1] 67 | j = self.np_random.choice( 68 | range(limits_h[room_j] + 1, limits_h[room_j + 1])) 69 | room_i += 1 70 | elif direction is v: 71 | i = self.np_random.choice( 72 | range(limits_v[room_i] + 1, limits_v[room_i + 1])) 73 | j = limits_h[room_j + 1] 74 | room_j += 1 75 | else: 76 | assert False 77 | self.grid.set(i, j, None) 78 | 79 | self.mission = ( 80 | "avoid the lava and get to the green goal square" 81 | if self.obstacle_type == Lava 82 | else "find the opening and get to the green goal square" 83 | ) 84 | 85 | class LavaCrossingEnv(CrossingEnv): 86 | def __init__(self): 87 | super().__init__(size=9, num_crossings=1) 88 | 89 | class LavaCrossingS9N2Env(CrossingEnv): 90 | def __init__(self): 91 | super().__init__(size=9, num_crossings=2) 92 | 93 | class LavaCrossingS9N3Env(CrossingEnv): 94 | def __init__(self): 95 | super().__init__(size=9, num_crossings=3) 96 | 97 | class LavaCrossingS11N5Env(CrossingEnv): 98 | def __init__(self): 99 | super().__init__(size=11, num_crossings=5) 100 | 101 | register( 102 | id='MiniGrid-LavaCrossingS9N1-v0', 103 | entry_point='gym_minigrid.envs:LavaCrossingEnv' 104 | ) 105 | 106 | register( 107 | id='MiniGrid-LavaCrossingS9N2-v0', 108 | entry_point='gym_minigrid.envs:LavaCrossingS9N2Env' 109 | ) 110 | 111 | register( 112 | id='MiniGrid-LavaCrossingS9N3-v0', 113 | entry_point='gym_minigrid.envs:LavaCrossingS9N3Env' 114 | ) 115 | 116 | register( 117 | id='MiniGrid-LavaCrossingS11N5-v0', 118 | entry_point='gym_minigrid.envs:LavaCrossingS11N5Env' 119 | ) 120 | 121 | class SimpleCrossingEnv(CrossingEnv): 122 | def __init__(self): 123 | super().__init__(size=9, num_crossings=1, obstacle_type=Wall) 124 | 125 | class SimpleCrossingS9N2Env(CrossingEnv): 126 | def __init__(self): 127 | super().__init__(size=9, num_crossings=2, obstacle_type=Wall) 128 | 129 | class SimpleCrossingS9N3Env(CrossingEnv): 130 | def __init__(self): 131 | super().__init__(size=9, num_crossings=3, obstacle_type=Wall) 132 | 133 | class SimpleCrossingS11N5Env(CrossingEnv): 134 | def __init__(self): 135 | super().__init__(size=11, num_crossings=5, obstacle_type=Wall) 136 | 137 | register( 138 | id='MiniGrid-SimpleCrossingS9N1-v0', 139 | entry_point='gym_minigrid.envs:SimpleCrossingEnv' 140 | ) 141 | 142 | register( 143 | id='MiniGrid-SimpleCrossingS9N2-v0', 144 | entry_point='gym_minigrid.envs:SimpleCrossingS9N2Env' 145 | ) 146 | 147 | register( 148 | id='MiniGrid-SimpleCrossingS9N3-v0', 149 | entry_point='gym_minigrid.envs:SimpleCrossingS9N3Env' 150 | ) 151 | 152 | register( 153 | id='MiniGrid-SimpleCrossingS11N5-v0', 154 | entry_point='gym_minigrid.envs:SimpleCrossingS11N5Env' 155 | ) 156 | -------------------------------------------------------------------------------- /gym_minigrid/rendering.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PyQt5.QtCore import Qt 3 | from PyQt5.QtGui import QImage, QPixmap, QPainter, QColor, QPolygon 4 | from PyQt5.QtCore import QPoint, QSize, QRect 5 | from PyQt5.QtWidgets import QApplication, QMainWindow, QWidget, QTextEdit 6 | from PyQt5.QtWidgets import QHBoxLayout, QVBoxLayout, QLabel, QFrame 7 | 8 | class Window(QMainWindow): 9 | """ 10 | Simple application window to render the environment into 11 | """ 12 | 13 | def __init__(self): 14 | super().__init__() 15 | 16 | self.setWindowTitle('MiniGrid Gym Environment') 17 | 18 | # Image label to display the rendering 19 | self.imgLabel = QLabel() 20 | self.imgLabel.setFrameStyle(QFrame.Panel | QFrame.Sunken) 21 | 22 | # Text box for the mission 23 | self.missionBox = QTextEdit() 24 | self.missionBox.setReadOnly(True) 25 | self.missionBox.setMinimumSize(400, 100) 26 | 27 | # Center the image 28 | hbox = QHBoxLayout() 29 | hbox.addStretch(1) 30 | hbox.addWidget(self.imgLabel) 31 | hbox.addStretch(1) 32 | 33 | # Arrange widgets vertically 34 | vbox = QVBoxLayout() 35 | vbox.addLayout(hbox) 36 | vbox.addWidget(self.missionBox) 37 | 38 | # Create a main widget for the window 39 | mainWidget = QWidget(self) 40 | self.setCentralWidget(mainWidget) 41 | mainWidget.setLayout(vbox) 42 | 43 | # Show the application window 44 | self.show() 45 | self.setFocus() 46 | 47 | self.closed = False 48 | 49 | # Callback for keyboard events 50 | self.keyDownCb = None 51 | 52 | def closeEvent(self, event): 53 | self.closed = True 54 | 55 | def setPixmap(self, pixmap): 56 | self.imgLabel.setPixmap(pixmap) 57 | 58 | def setText(self, text): 59 | self.missionBox.setPlainText(text) 60 | 61 | def setKeyDownCb(self, callback): 62 | self.keyDownCb = callback 63 | 64 | def keyPressEvent(self, e): 65 | if self.keyDownCb == None: 66 | return 67 | 68 | keyName = None 69 | if e.key() == Qt.Key_Left: 70 | keyName = 'LEFT' 71 | elif e.key() == Qt.Key_Right: 72 | keyName = 'RIGHT' 73 | elif e.key() == Qt.Key_Up: 74 | keyName = 'UP' 75 | elif e.key() == Qt.Key_Down: 76 | keyName = 'DOWN' 77 | elif e.key() == Qt.Key_Space: 78 | keyName = 'SPACE' 79 | elif e.key() == Qt.Key_Return: 80 | keyName = 'RETURN' 81 | elif e.key() == Qt.Key_Alt: 82 | keyName = 'ALT' 83 | elif e.key() == Qt.Key_Control: 84 | keyName = 'CTRL' 85 | elif e.key() == Qt.Key_PageUp: 86 | keyName = 'PAGE_UP' 87 | elif e.key() == Qt.Key_PageDown: 88 | keyName = 'PAGE_DOWN' 89 | elif e.key() == Qt.Key_Backspace: 90 | keyName = 'BACKSPACE' 91 | elif e.key() == Qt.Key_Escape: 92 | keyName = 'ESCAPE' 93 | 94 | if keyName == None: 95 | return 96 | self.keyDownCb(keyName) 97 | 98 | class Renderer: 99 | def __init__(self, width, height, ownWindow=False): 100 | self.width = width 101 | self.height = height 102 | 103 | self.img = QImage(width, height, QImage.Format_RGB888) 104 | self.painter = QPainter() 105 | 106 | self.window = None 107 | if ownWindow: 108 | self.app = QApplication([]) 109 | self.window = Window() 110 | 111 | def close(self): 112 | """ 113 | Deallocate resources used 114 | """ 115 | pass 116 | 117 | def beginFrame(self): 118 | self.painter.begin(self.img) 119 | self.painter.setRenderHint(QPainter.Antialiasing, False) 120 | 121 | # Clear the background 122 | self.painter.setBrush(QColor(0, 0, 0)) 123 | self.painter.drawRect(0, 0, self.width - 1, self.height - 1) 124 | 125 | def endFrame(self): 126 | self.painter.end() 127 | 128 | if self.window: 129 | if self.window.closed: 130 | self.window = None 131 | else: 132 | self.window.setPixmap(self.getPixmap()) 133 | self.app.processEvents() 134 | 135 | def getPixmap(self): 136 | return QPixmap.fromImage(self.img) 137 | 138 | def getArray(self): 139 | """ 140 | Get a numpy array of RGB pixel values. 141 | The array will have shape (height, width, 3) 142 | """ 143 | 144 | numBytes = self.width * self.height * 3 145 | buf = self.img.bits().asstring(numBytes) 146 | output = np.frombuffer(buf, dtype='uint8') 147 | output = output.reshape((self.height, self.width, 3)) 148 | 149 | return output 150 | 151 | def push(self): 152 | self.painter.save() 153 | 154 | def pop(self): 155 | self.painter.restore() 156 | 157 | def rotate(self, degrees): 158 | self.painter.rotate(degrees) 159 | 160 | def translate(self, x, y): 161 | self.painter.translate(x, y) 162 | 163 | def scale(self, x, y): 164 | self.painter.scale(x, y) 165 | 166 | def setLineColor(self, r, g, b, a=255): 167 | self.painter.setPen(QColor(r, g, b, a)) 168 | 169 | def setColor(self, r, g, b, a=255): 170 | self.painter.setBrush(QColor(r, g, b, a)) 171 | 172 | def setLineWidth(self, width): 173 | pen = self.painter.pen() 174 | pen.setWidthF(width) 175 | self.painter.setPen(pen) 176 | 177 | def drawLine(self, x0, y0, x1, y1): 178 | self.painter.drawLine(x0, y0, x1, y1) 179 | 180 | def drawCircle(self, x, y, r): 181 | center = QPoint(x, y) 182 | self.painter.drawEllipse(center, r, r) 183 | 184 | def drawPolygon(self, points): 185 | """Takes a list of points (tuples) as input""" 186 | points = map(lambda p: QPoint(p[0], p[1]), points) 187 | self.painter.drawPolygon(QPolygon(points)) 188 | 189 | def drawPolyline(self, points): 190 | """Takes a list of points (tuples) as input""" 191 | points = map(lambda p: QPoint(p[0], p[1]), points) 192 | self.painter.drawPolyline(QPolygon(points)) 193 | 194 | def fillRect(self, x, y, width, height, r, g, b, a=255): 195 | self.painter.fillRect(QRect(x, y, width, height), QColor(r, g, b, a)) 196 | -------------------------------------------------------------------------------- /gym_minigrid/envs/obstructedmaze.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.roomgrid import RoomGrid 3 | from gym_minigrid.register import register 4 | 5 | class ObstructedMazeEnv(RoomGrid): 6 | """ 7 | A blue ball is hidden in the maze. Doors may be locked, 8 | doors may be obstructed by a ball and keys may be hidden in boxes. 9 | """ 10 | 11 | def __init__(self, 12 | num_rows, 13 | num_cols, 14 | num_rooms_visited, 15 | seed=None 16 | ): 17 | room_size = 6 18 | max_steps = 4*num_rooms_visited*room_size**2 19 | 20 | super().__init__( 21 | room_size=room_size, 22 | num_rows=num_rows, 23 | num_cols=num_cols, 24 | max_steps=max_steps, 25 | seed=seed 26 | ) 27 | 28 | def _gen_grid(self, width, height): 29 | super()._gen_grid(width, height) 30 | 31 | # Define all possible colors for doors 32 | self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES)) 33 | # Define the color of the ball to pick up 34 | self.ball_to_find_color = COLOR_NAMES[0] 35 | # Define the color of the balls that obstruct doors 36 | self.blocking_ball_color = COLOR_NAMES[1] 37 | # Define the color of boxes in which keys are hidden 38 | self.box_color = COLOR_NAMES[2] 39 | 40 | self.mission = "pick up the %s ball" % self.ball_to_find_color 41 | 42 | def step(self, action): 43 | obs, reward, done, info = super().step(action) 44 | 45 | if action == self.actions.pickup: 46 | if self.carrying and self.carrying == self.obj: 47 | reward = self._reward() 48 | done = True 49 | 50 | return obs, reward, done, info 51 | 52 | def add_door(self, i, j, door_idx=0, color=None, locked=False, key_in_box=False, blocked=False): 53 | """ 54 | Add a door. If the door must be locked, it also adds the key. 55 | If the key must be hidden, it is put in a box. If the door must 56 | be obstructed, it adds a ball in front of the door. 57 | """ 58 | 59 | door, door_pos = super().add_door(i, j, door_idx, color, locked=locked) 60 | 61 | if blocked: 62 | vec = DIR_TO_VEC[door_idx] 63 | blocking_ball = Ball(self.blocking_ball_color) if blocked else None 64 | self.grid.set(door_pos[0]-vec[0], door_pos[1]-vec[1], blocking_ball) 65 | 66 | if locked: 67 | obj = Key(door.color) 68 | if key_in_box: 69 | box = Box(self.box_color) if key_in_box else None 70 | box.contains = obj 71 | obj = box 72 | self.place_in_room(i, j, obj) 73 | 74 | return door, door_pos 75 | 76 | class ObstructedMaze_1Dlhb(ObstructedMazeEnv): 77 | """ 78 | A blue ball is hidden in a 2x1 maze. A locked door separates 79 | rooms. Doors are obstructed by a ball and keys are hidden in boxes. 80 | """ 81 | 82 | def __init__(self, key_in_box=True, blocked=True, seed=None): 83 | self.key_in_box = key_in_box 84 | self.blocked = blocked 85 | 86 | super().__init__( 87 | num_rows=1, 88 | num_cols=2, 89 | num_rooms_visited=2, 90 | seed=seed 91 | ) 92 | 93 | def _gen_grid(self, width, height): 94 | super()._gen_grid(width, height) 95 | 96 | self.add_door(0, 0, door_idx=0, color=self.door_colors[0], 97 | locked=True, 98 | key_in_box=self.key_in_box, 99 | blocked=self.blocked) 100 | 101 | self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color) 102 | self.place_agent(0, 0) 103 | 104 | class ObstructedMaze_1Dl(ObstructedMaze_1Dlhb): 105 | def __init__(self, seed=None): 106 | super().__init__(False, False, seed) 107 | 108 | class ObstructedMaze_1Dlh(ObstructedMaze_1Dlhb): 109 | def __init__(self, seed=None): 110 | super().__init__(True, False, seed) 111 | 112 | class ObstructedMaze_Full(ObstructedMazeEnv): 113 | """ 114 | A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors 115 | are locked, doors are obstructed by a ball and keys are hidden in 116 | boxes. 117 | """ 118 | 119 | def __init__(self, agent_room=(1, 1), key_in_box=True, blocked=True, 120 | num_quarters=4, num_rooms_visited=25, seed=None): 121 | self.agent_room = agent_room 122 | self.key_in_box = key_in_box 123 | self.blocked = blocked 124 | self.num_quarters = num_quarters 125 | 126 | super().__init__( 127 | num_rows=3, 128 | num_cols=3, 129 | num_rooms_visited=num_rooms_visited, 130 | seed=seed 131 | ) 132 | 133 | def _gen_grid(self, width, height): 134 | super()._gen_grid(width, height) 135 | 136 | middle_room = (1, 1) 137 | # Define positions of "side rooms" i.e. rooms that are neither 138 | # corners nor the center. 139 | side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][:self.num_quarters] 140 | for i in range(len(side_rooms)): 141 | side_room = side_rooms[i] 142 | 143 | # Add a door between the center room and the side room 144 | self.add_door(*middle_room, door_idx=i, color=self.door_colors[i], locked=False) 145 | 146 | for k in [-1, 1]: 147 | # Add a door to each side of the side room 148 | self.add_door(*side_room, locked=True, 149 | door_idx=(i+k)%4, 150 | color=self.door_colors[(i+k)%len(self.door_colors)], 151 | key_in_box=self.key_in_box, 152 | blocked=self.blocked) 153 | 154 | corners = [(2, 0), (2, 2), (0, 2), (0, 0)][:self.num_quarters] 155 | ball_room = self._rand_elem(corners) 156 | 157 | self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color) 158 | self.place_agent(*self.agent_room) 159 | 160 | class ObstructedMaze_2Dl(ObstructedMaze_Full): 161 | def __init__(self, seed=None): 162 | super().__init__((2, 1), False, False, 1, 4, seed) 163 | 164 | class ObstructedMaze_2Dlh(ObstructedMaze_Full): 165 | def __init__(self, seed=None): 166 | super().__init__((2, 1), True, False, 1, 4, seed) 167 | 168 | 169 | class ObstructedMaze_2Dlhb(ObstructedMaze_Full): 170 | def __init__(self, seed=None): 171 | super().__init__((2, 1), True, True, 1, 4, seed) 172 | 173 | class ObstructedMaze_1Q(ObstructedMaze_Full): 174 | def __init__(self, seed=None): 175 | super().__init__((1, 1), True, True, 1, 5, seed) 176 | 177 | class ObstructedMaze_2Q(ObstructedMaze_Full): 178 | def __init__(self, seed=None): 179 | super().__init__((1, 1), True, True, 2, 11, seed) 180 | 181 | register( 182 | id="MiniGrid-ObstructedMaze-1Dl-v0", 183 | entry_point="gym_minigrid.envs:ObstructedMaze_1Dl" 184 | ) 185 | 186 | register( 187 | id="MiniGrid-ObstructedMaze-1Dlh-v0", 188 | entry_point="gym_minigrid.envs:ObstructedMaze_1Dlh" 189 | ) 190 | 191 | register( 192 | id="MiniGrid-ObstructedMaze-1Dlhb-v0", 193 | entry_point="gym_minigrid.envs:ObstructedMaze_1Dlhb" 194 | ) 195 | 196 | register( 197 | id="MiniGrid-ObstructedMaze-2Dl-v0", 198 | entry_point="gym_minigrid.envs:ObstructedMaze_2Dl" 199 | ) 200 | 201 | register( 202 | id="MiniGrid-ObstructedMaze-2Dlh-v0", 203 | entry_point="gym_minigrid.envs:ObstructedMaze_2Dlh" 204 | ) 205 | 206 | register( 207 | id="MiniGrid-ObstructedMaze-2Dlhb-v0", 208 | entry_point="gym_minigrid.envs:ObstructedMaze_2Dlhb" 209 | ) 210 | 211 | register( 212 | id="MiniGrid-ObstructedMaze-1Q-v0", 213 | entry_point="gym_minigrid.envs:ObstructedMaze_1Q" 214 | ) 215 | 216 | register( 217 | id="MiniGrid-ObstructedMaze-2Q-v0", 218 | entry_point="gym_minigrid.envs:ObstructedMaze_2Q" 219 | ) 220 | 221 | register( 222 | id="MiniGrid-ObstructedMaze-Full-v0", 223 | entry_point="gym_minigrid.envs:ObstructedMaze_Full" 224 | ) -------------------------------------------------------------------------------- /gym_minigrid/wrappers.py: -------------------------------------------------------------------------------- 1 | import math 2 | import operator 3 | from functools import reduce 4 | 5 | import numpy as np 6 | import gym 7 | from gym import error, spaces, utils 8 | from .minigrid import OBJECT_TO_IDX, COLOR_TO_IDX 9 | from .minigrid import CELL_PIXELS 10 | 11 | class ReseedWrapper(gym.core.Wrapper): 12 | """ 13 | Wrapper to always regenerate an environment with the same set of seeds. 14 | This can be used to force an environment to always keep the same 15 | configuration when reset. 16 | """ 17 | 18 | def __init__(self, env, seeds=[0], seed_idx=0): 19 | self.seeds = list(seeds) 20 | self.seed_idx = seed_idx 21 | super().__init__(env) 22 | 23 | def reset(self, **kwargs): 24 | seed = self.seeds[self.seed_idx] 25 | self.seed_idx = (self.seed_idx + 1) % len(self.seeds) 26 | self.env.seed(seed) 27 | return self.env.reset(**kwargs) 28 | 29 | def step(self, action): 30 | obs, reward, done, info = self.env.step(action) 31 | return obs, reward, done, info 32 | 33 | class ActionBonus(gym.core.Wrapper): 34 | """ 35 | Wrapper which adds an exploration bonus. 36 | This is a reward to encourage exploration of less 37 | visited (state,action) pairs. 38 | """ 39 | 40 | def __init__(self, env): 41 | super().__init__(env) 42 | self.counts = {} 43 | 44 | def step(self, action): 45 | obs, reward, done, info = self.env.step(action) 46 | 47 | env = self.unwrapped 48 | tup = (tuple(env.agent_pos), env.agent_dir, action) 49 | 50 | # Get the count for this (s,a) pair 51 | pre_count = 0 52 | if tup in self.counts: 53 | pre_count = self.counts[tup] 54 | 55 | # Update the count for this (s,a) pair 56 | new_count = pre_count + 1 57 | self.counts[tup] = new_count 58 | 59 | bonus = 1 / math.sqrt(new_count) 60 | reward += bonus 61 | 62 | return obs, reward, done, info 63 | 64 | def reset(self, **kwargs): 65 | return self.env.reset(**kwargs) 66 | 67 | class StateBonus(gym.core.Wrapper): 68 | """ 69 | Adds an exploration bonus based on which positions 70 | are visited on the grid. 71 | """ 72 | 73 | def __init__(self, env): 74 | super().__init__(env) 75 | self.counts = {} 76 | 77 | def step(self, action): 78 | obs, reward, done, info = self.env.step(action) 79 | 80 | # Tuple based on which we index the counts 81 | # We use the position after an update 82 | env = self.unwrapped 83 | tup = (tuple(env.agent_pos)) 84 | 85 | # Get the count for this key 86 | pre_count = 0 87 | if tup in self.counts: 88 | pre_count = self.counts[tup] 89 | 90 | # Update the count for this key 91 | new_count = pre_count + 1 92 | self.counts[tup] = new_count 93 | 94 | bonus = 1 / math.sqrt(new_count) 95 | reward += bonus 96 | 97 | return obs, reward, done, info 98 | 99 | def reset(self, **kwargs): 100 | return self.env.reset(**kwargs) 101 | 102 | class ImgObsWrapper(gym.core.ObservationWrapper): 103 | """ 104 | Use the image as the only observation output, no language/mission. 105 | """ 106 | 107 | def __init__(self, env): 108 | super().__init__(env) 109 | 110 | self.observation_space = env.observation_space.spaces['image'] 111 | 112 | def observation(self, obs): 113 | return obs['image'] 114 | 115 | class RGBImgObsWrapper(gym.core.ObservationWrapper): 116 | """ 117 | Wrapper to use fully observable RGB image as the only observation output, 118 | no language/mission. This can be used to have the agent to solve the 119 | gridworld in pixel space. 120 | """ 121 | 122 | def __init__(self, env, tile_size=8): 123 | super().__init__(env) 124 | 125 | self.tile_size = tile_size 126 | 127 | self.observation_space = spaces.Box( 128 | low=0, 129 | high=255, 130 | shape=(self.env.width*tile_size, self.env.height*tile_size, 3), 131 | dtype='uint8' 132 | ) 133 | 134 | def observation(self, obs): 135 | env = self.unwrapped 136 | return env.render( 137 | mode='rgb_array', 138 | highlight=False, 139 | tile_size=self.tile_size 140 | ) 141 | 142 | class RGBImgPartialObsWrapper(gym.core.ObservationWrapper): 143 | """ 144 | Wrapper to use partially observable RGB image as the only observation output 145 | This can be used to have the agent to solve the gridworld in pixel space. 146 | """ 147 | 148 | def __init__(self, env, tile_size=8): 149 | super().__init__(env) 150 | 151 | self.tile_size = tile_size 152 | 153 | obs_shape = env.observation_space['image'].shape 154 | self.observation_space = spaces.Box( 155 | low=0, 156 | high=255, 157 | shape=(obs_shape[0] * tile_size, obs_shape[1] * tile_size, 3), 158 | dtype='uint8' 159 | ) 160 | 161 | def observation(self, obs): 162 | env = self.unwrapped 163 | return { 164 | 'mission': obs['mission'], 165 | 'image': env.get_obs_render(obs['image'], tile_size=self.tile_size, mode='rgb_array') 166 | } 167 | 168 | class FullyObsWrapper(gym.core.ObservationWrapper): 169 | """ 170 | Fully observable gridworld using a compact grid encoding 171 | """ 172 | 173 | def __init__(self, env): 174 | super().__init__(env) 175 | 176 | self.observation_space = spaces.Box( 177 | low=0, 178 | high=255, 179 | shape=(self.env.width, self.env.height, 3), # number of cells 180 | dtype='uint8' 181 | ) 182 | 183 | def observation(self, obs): 184 | env = self.unwrapped 185 | full_grid = env.grid.encode() 186 | full_grid[env.agent_pos[0]][env.agent_pos[1]] = np.array([ 187 | OBJECT_TO_IDX['agent'], 188 | COLOR_TO_IDX['red'], 189 | env.agent_dir 190 | ]) 191 | 192 | return full_grid 193 | 194 | class FlatObsWrapper(gym.core.ObservationWrapper): 195 | """ 196 | Encode mission strings using a one-hot scheme, 197 | and combine these with observed images into one flat array 198 | """ 199 | 200 | def __init__(self, env, maxStrLen=96): 201 | super().__init__(env) 202 | 203 | self.maxStrLen = maxStrLen 204 | self.numCharCodes = 27 205 | 206 | imgSpace = env.observation_space.spaces['image'] 207 | imgSize = reduce(operator.mul, imgSpace.shape, 1) 208 | 209 | self.observation_space = spaces.Box( 210 | low=0, 211 | high=255, 212 | shape=(1, imgSize + self.numCharCodes * self.maxStrLen), 213 | dtype='uint8' 214 | ) 215 | 216 | self.cachedStr = None 217 | self.cachedArray = None 218 | 219 | def observation(self, obs): 220 | image = obs['image'] 221 | mission = obs['mission'] 222 | 223 | # Cache the last-encoded mission string 224 | if mission != self.cachedStr: 225 | assert len(mission) <= self.maxStrLen, 'mission string too long ({} chars)'.format(len(mission)) 226 | mission = mission.lower() 227 | 228 | strArray = np.zeros(shape=(self.maxStrLen, self.numCharCodes), dtype='float32') 229 | 230 | for idx, ch in enumerate(mission): 231 | if ch >= 'a' and ch <= 'z': 232 | chNo = ord(ch) - ord('a') 233 | elif ch == ' ': 234 | chNo = ord('z') - ord('a') + 1 235 | assert chNo < self.numCharCodes, '%s : %d' % (ch, chNo) 236 | strArray[idx, chNo] = 1 237 | 238 | self.cachedStr = mission 239 | self.cachedArray = strArray 240 | 241 | obs = np.concatenate((image.flatten(), self.cachedArray.flatten())) 242 | 243 | return obs 244 | 245 | class AgentViewWrapper(gym.core.Wrapper): 246 | """ 247 | Wrapper to customize the agent field of view size. 248 | """ 249 | 250 | def __init__(self, env, agent_view_size=7): 251 | super(AgentViewWrapper, self).__init__(env) 252 | 253 | # Override default view size 254 | env.unwrapped.agent_view_size = agent_view_size 255 | 256 | # Compute observation space with specified view size 257 | observation_space = gym.spaces.Box( 258 | low=0, 259 | high=255, 260 | shape=(agent_view_size, agent_view_size, 3), 261 | dtype='uint8' 262 | ) 263 | 264 | # Override the environment's observation space 265 | self.observation_space = spaces.Dict({ 266 | 'image': observation_space 267 | }) 268 | 269 | def reset(self, **kwargs): 270 | return self.env.reset(**kwargs) 271 | 272 | def step(self, action): 273 | return self.env.step(action) 274 | -------------------------------------------------------------------------------- /gym_minigrid/envs/multiroom.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class Room: 5 | def __init__(self, 6 | top, 7 | size, 8 | entryDoorPos, 9 | exitDoorPos 10 | ): 11 | self.top = top 12 | self.size = size 13 | self.entryDoorPos = entryDoorPos 14 | self.exitDoorPos = exitDoorPos 15 | 16 | class MultiRoomEnv(MiniGridEnv): 17 | """ 18 | Environment with multiple rooms (subgoals) 19 | """ 20 | 21 | def __init__(self, 22 | minNumRooms, 23 | maxNumRooms, 24 | maxRoomSize=10 25 | ): 26 | assert minNumRooms > 0 27 | assert maxNumRooms >= minNumRooms 28 | assert maxRoomSize >= 4 29 | 30 | self.minNumRooms = minNumRooms 31 | self.maxNumRooms = maxNumRooms 32 | self.maxRoomSize = maxRoomSize 33 | 34 | self.rooms = [] 35 | 36 | super(MultiRoomEnv, self).__init__( 37 | grid_size=25, 38 | max_steps=self.maxNumRooms * 20 39 | ) 40 | 41 | def _gen_grid(self, width, height): 42 | roomList = [] 43 | 44 | # Choose a random number of rooms to generate 45 | numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms+1) 46 | 47 | while len(roomList) < numRooms: 48 | curRoomList = [] 49 | 50 | entryDoorPos = ( 51 | self._rand_int(0, width - 2), 52 | self._rand_int(0, width - 2) 53 | ) 54 | 55 | # Recursively place the rooms 56 | self._placeRoom( 57 | numRooms, 58 | roomList=curRoomList, 59 | minSz=4, 60 | maxSz=self.maxRoomSize, 61 | entryDoorWall=2, 62 | entryDoorPos=entryDoorPos 63 | ) 64 | 65 | if len(curRoomList) > len(roomList): 66 | roomList = curRoomList 67 | 68 | # Store the list of rooms in this environment 69 | assert len(roomList) > 0 70 | self.rooms = roomList 71 | 72 | # Create the grid 73 | self.grid = Grid(width, height) 74 | wall = Wall() 75 | 76 | prevDoorColor = None 77 | 78 | # For each room 79 | for idx, room in enumerate(roomList): 80 | 81 | topX, topY = room.top 82 | sizeX, sizeY = room.size 83 | 84 | # Draw the top and bottom walls 85 | for i in range(0, sizeX): 86 | self.grid.set(topX + i, topY, wall) 87 | self.grid.set(topX + i, topY + sizeY - 1, wall) 88 | 89 | # Draw the left and right walls 90 | for j in range(0, sizeY): 91 | self.grid.set(topX, topY + j, wall) 92 | self.grid.set(topX + sizeX - 1, topY + j, wall) 93 | 94 | # If this isn't the first room, place the entry door 95 | if idx > 0: 96 | # Pick a door color different from the previous one 97 | doorColors = set(COLOR_NAMES) 98 | if prevDoorColor: 99 | doorColors.remove(prevDoorColor) 100 | # Note: the use of sorting here guarantees determinism, 101 | # This is needed because Python's set is not deterministic 102 | doorColor = self._rand_elem(sorted(doorColors)) 103 | 104 | entryDoor = Door(doorColor) 105 | self.grid.set(*room.entryDoorPos, entryDoor) 106 | prevDoorColor = doorColor 107 | 108 | prevRoom = roomList[idx-1] 109 | prevRoom.exitDoorPos = room.entryDoorPos 110 | 111 | # Randomize the starting agent position and direction 112 | self.place_agent(roomList[0].top, roomList[0].size) 113 | 114 | # Place the final goal in the last room 115 | self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size) 116 | 117 | self.mission = 'traverse the rooms to get to the goal' 118 | 119 | def _placeRoom( 120 | self, 121 | numLeft, 122 | roomList, 123 | minSz, 124 | maxSz, 125 | entryDoorWall, 126 | entryDoorPos 127 | ): 128 | # Choose the room size randomly 129 | sizeX = self._rand_int(minSz, maxSz+1) 130 | sizeY = self._rand_int(minSz, maxSz+1) 131 | 132 | # The first room will be at the door position 133 | if len(roomList) == 0: 134 | topX, topY = entryDoorPos 135 | # Entry on the right 136 | elif entryDoorWall == 0: 137 | topX = entryDoorPos[0] - sizeX + 1 138 | y = entryDoorPos[1] 139 | topY = self._rand_int(y - sizeY + 2, y) 140 | # Entry wall on the south 141 | elif entryDoorWall == 1: 142 | x = entryDoorPos[0] 143 | topX = self._rand_int(x - sizeX + 2, x) 144 | topY = entryDoorPos[1] - sizeY + 1 145 | # Entry wall on the left 146 | elif entryDoorWall == 2: 147 | topX = entryDoorPos[0] 148 | y = entryDoorPos[1] 149 | topY = self._rand_int(y - sizeY + 2, y) 150 | # Entry wall on the top 151 | elif entryDoorWall == 3: 152 | x = entryDoorPos[0] 153 | topX = self._rand_int(x - sizeX + 2, x) 154 | topY = entryDoorPos[1] 155 | else: 156 | assert False, entryDoorWall 157 | 158 | # If the room is out of the grid, can't place a room here 159 | if topX < 0 or topY < 0: 160 | return False 161 | if topX + sizeX > self.width or topY + sizeY >= self.height: 162 | return False 163 | 164 | # If the room intersects with previous rooms, can't place it here 165 | for room in roomList[:-1]: 166 | nonOverlap = \ 167 | topX + sizeX < room.top[0] or \ 168 | room.top[0] + room.size[0] <= topX or \ 169 | topY + sizeY < room.top[1] or \ 170 | room.top[1] + room.size[1] <= topY 171 | 172 | if not nonOverlap: 173 | return False 174 | 175 | # Add this room to the list 176 | roomList.append(Room( 177 | (topX, topY), 178 | (sizeX, sizeY), 179 | entryDoorPos, 180 | None 181 | )) 182 | 183 | # If this was the last room, stop 184 | if numLeft == 1: 185 | return True 186 | 187 | # Try placing the next room 188 | for i in range(0, 8): 189 | 190 | # Pick which wall to place the out door on 191 | wallSet = set((0, 1, 2, 3)) 192 | wallSet.remove(entryDoorWall) 193 | exitDoorWall = self._rand_elem(sorted(wallSet)) 194 | nextEntryWall = (exitDoorWall + 2) % 4 195 | 196 | # Pick the exit door position 197 | # Exit on right wall 198 | if exitDoorWall == 0: 199 | exitDoorPos = ( 200 | topX + sizeX - 1, 201 | topY + self._rand_int(1, sizeY - 1) 202 | ) 203 | # Exit on south wall 204 | elif exitDoorWall == 1: 205 | exitDoorPos = ( 206 | topX + self._rand_int(1, sizeX - 1), 207 | topY + sizeY - 1 208 | ) 209 | # Exit on left wall 210 | elif exitDoorWall == 2: 211 | exitDoorPos = ( 212 | topX, 213 | topY + self._rand_int(1, sizeY - 1) 214 | ) 215 | # Exit on north wall 216 | elif exitDoorWall == 3: 217 | exitDoorPos = ( 218 | topX + self._rand_int(1, sizeX - 1), 219 | topY 220 | ) 221 | else: 222 | assert False 223 | 224 | # Recursively create the other rooms 225 | success = self._placeRoom( 226 | numLeft - 1, 227 | roomList=roomList, 228 | minSz=minSz, 229 | maxSz=maxSz, 230 | entryDoorWall=nextEntryWall, 231 | entryDoorPos=exitDoorPos 232 | ) 233 | 234 | if success: 235 | break 236 | 237 | return True 238 | 239 | class MultiRoomEnvN2S4(MultiRoomEnv): 240 | def __init__(self): 241 | super().__init__( 242 | minNumRooms=2, 243 | maxNumRooms=2, 244 | maxRoomSize=4 245 | ) 246 | 247 | class MultiRoomEnvN4S5(MultiRoomEnv): 248 | def __init__(self): 249 | super().__init__( 250 | minNumRooms=4, 251 | maxNumRooms=4, 252 | maxRoomSize=5 253 | ) 254 | 255 | class MultiRoomEnvN6(MultiRoomEnv): 256 | def __init__(self): 257 | super().__init__( 258 | minNumRooms=6, 259 | maxNumRooms=6 260 | ) 261 | 262 | register( 263 | id='MiniGrid-MultiRoom-N2-S4-v0', 264 | entry_point='gym_minigrid.envs:MultiRoomEnvN2S4' 265 | ) 266 | 267 | register( 268 | id='MiniGrid-MultiRoom-N4-S5-v0', 269 | entry_point='gym_minigrid.envs:MultiRoomEnvN4S5' 270 | ) 271 | 272 | register( 273 | id='MiniGrid-MultiRoom-N6-v0', 274 | entry_point='gym_minigrid.envs:MultiRoomEnvN6' 275 | ) 276 | -------------------------------------------------------------------------------- /gym_minigrid/roomgrid.py: -------------------------------------------------------------------------------- 1 | from .minigrid import * 2 | 3 | def reject_next_to(env, pos): 4 | """ 5 | Function to filter out object positions that are right next to 6 | the agent's starting point 7 | """ 8 | 9 | sx, sy = env.agent_pos 10 | x, y = pos 11 | d = abs(sx - x) + abs(sy - y) 12 | return d < 2 13 | 14 | class Room: 15 | def __init__( 16 | self, 17 | top, 18 | size 19 | ): 20 | # Top-left corner and size (tuples) 21 | self.top = top 22 | self.size = size 23 | 24 | # List of door objects and door positions 25 | # Order of the doors is right, down, left, up 26 | self.doors = [None] * 4 27 | self.door_pos = [None] * 4 28 | 29 | # List of rooms adjacent to this one 30 | # Order of the neighbors is right, down, left, up 31 | self.neighbors = [None] * 4 32 | 33 | # Indicates if this room is behind a locked door 34 | self.locked = False 35 | 36 | # List of objects contained 37 | self.objs = [] 38 | 39 | def rand_pos(self, env): 40 | topX, topY = self.top 41 | sizeX, sizeY = self.size 42 | return env._randPos( 43 | topX + 1, topX + sizeX - 1, 44 | topY + 1, topY + sizeY - 1 45 | ) 46 | 47 | def pos_inside(self, x, y): 48 | """ 49 | Check if a position is within the bounds of this room 50 | """ 51 | 52 | topX, topY = self.top 53 | sizeX, sizeY = self.size 54 | 55 | if x < topX or y < topY: 56 | return False 57 | 58 | if x >= topX + sizeX or y >= topY + sizeY: 59 | return False 60 | 61 | return True 62 | 63 | class RoomGrid(MiniGridEnv): 64 | """ 65 | Environment with multiple rooms and random objects. 66 | This is meant to serve as a base class for other environments. 67 | """ 68 | 69 | def __init__( 70 | self, 71 | room_size=7, 72 | num_rows=3, 73 | num_cols=3, 74 | max_steps=100, 75 | seed=0 76 | ): 77 | assert room_size > 0 78 | assert room_size >= 3 79 | assert num_rows > 0 80 | assert num_cols > 0 81 | self.room_size = room_size 82 | self.num_rows = num_rows 83 | self.num_cols = num_cols 84 | 85 | height = (room_size - 1) * num_rows + 1 86 | width = (room_size - 1) * num_cols + 1 87 | 88 | # By default, this environment has no mission 89 | self.mission = '' 90 | 91 | super().__init__( 92 | width=width, 93 | height=height, 94 | max_steps=max_steps, 95 | see_through_walls=False, 96 | seed=seed 97 | ) 98 | 99 | def room_from_pos(self, x, y): 100 | """Get the room a given position maps to""" 101 | 102 | assert x >= 0 103 | assert y >= 0 104 | 105 | i = x // (self.room_size-1) 106 | j = y // (self.room_size-1) 107 | 108 | assert i < self.num_cols 109 | assert j < self.num_rows 110 | 111 | return self.room_grid[j][i] 112 | 113 | def get_room(self, i, j): 114 | assert i < self.num_cols 115 | assert j < self.num_rows 116 | return self.room_grid[j][i] 117 | 118 | def _gen_grid(self, width, height): 119 | # Create the grid 120 | self.grid = Grid(width, height) 121 | 122 | self.room_grid = [] 123 | 124 | # For each row of rooms 125 | for j in range(0, self.num_rows): 126 | row = [] 127 | 128 | # For each column of rooms 129 | for i in range(0, self.num_cols): 130 | room = Room( 131 | (i * (self.room_size-1), j * (self.room_size-1)), 132 | (self.room_size, self.room_size) 133 | ) 134 | row.append(room) 135 | 136 | # Generate the walls for this room 137 | self.grid.wall_rect(*room.top, *room.size) 138 | 139 | self.room_grid.append(row) 140 | 141 | # For each row of rooms 142 | for j in range(0, self.num_rows): 143 | # For each column of rooms 144 | for i in range(0, self.num_cols): 145 | room = self.room_grid[j][i] 146 | 147 | x_l, y_l = (room.top[0] + 1, room.top[1] + 1) 148 | x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) 149 | 150 | # Door positions, order is right, down, left, up 151 | if i < self.num_cols - 1: 152 | room.neighbors[0] = self.room_grid[j][i+1] 153 | room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) 154 | if j < self.num_rows - 1: 155 | room.neighbors[1] = self.room_grid[j+1][i] 156 | room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) 157 | if i > 0: 158 | room.neighbors[2] = self.room_grid[j][i-1] 159 | room.door_pos[2] = room.neighbors[2].door_pos[0] 160 | if j > 0: 161 | room.neighbors[3] = self.room_grid[j-1][i] 162 | room.door_pos[3] = room.neighbors[3].door_pos[1] 163 | 164 | # The agent starts in the middle, facing right 165 | self.agent_pos = ( 166 | (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), 167 | (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) 168 | ) 169 | self.agent_dir = 0 170 | 171 | def place_in_room(self, i, j, obj): 172 | """ 173 | Add an existing object to room (i, j) 174 | """ 175 | 176 | room = self.get_room(i, j) 177 | 178 | pos = self.place_obj( 179 | obj, 180 | room.top, 181 | room.size, 182 | reject_fn=reject_next_to, 183 | max_tries=1000 184 | ) 185 | 186 | room.objs.append(obj) 187 | 188 | return obj, pos 189 | 190 | def add_object(self, i, j, kind=None, color=None): 191 | """ 192 | Add a new object to room (i, j) 193 | """ 194 | 195 | if kind == None: 196 | kind = self._rand_elem(['key', 'ball', 'box']) 197 | 198 | if color == None: 199 | color = self._rand_color() 200 | 201 | # TODO: we probably want to add an Object.make helper function 202 | assert kind in ['key', 'ball', 'box'] 203 | if kind == 'key': 204 | obj = Key(color) 205 | elif kind == 'ball': 206 | obj = Ball(color) 207 | elif kind == 'box': 208 | obj = Box(color) 209 | 210 | return self.place_in_room(i, j, obj) 211 | 212 | def add_door(self, i, j, door_idx=None, color=None, locked=None): 213 | """ 214 | Add a door to a room, connecting it to a neighbor 215 | """ 216 | 217 | room = self.get_room(i, j) 218 | 219 | if door_idx == None: 220 | # Need to make sure that there is a neighbor along this wall 221 | # and that there is not already a door 222 | while True: 223 | door_idx = self._rand_int(0, 4) 224 | if room.neighbors[door_idx] and room.doors[door_idx] is None: 225 | break 226 | 227 | if color == None: 228 | color = self._rand_color() 229 | 230 | if locked is None: 231 | locked = self._rand_bool() 232 | 233 | assert room.doors[door_idx] is None, "door already exists" 234 | 235 | room.locked = locked 236 | door = Door(color, is_locked=locked) 237 | 238 | pos = room.door_pos[door_idx] 239 | self.grid.set(*pos, door) 240 | door.cur_pos = pos 241 | 242 | neighbor = room.neighbors[door_idx] 243 | room.doors[door_idx] = door 244 | neighbor.doors[(door_idx+2) % 4] = door 245 | 246 | return door, pos 247 | 248 | def remove_wall(self, i, j, wall_idx): 249 | """ 250 | Remove a wall between two rooms 251 | """ 252 | 253 | room = self.get_room(i, j) 254 | 255 | assert wall_idx >= 0 and wall_idx < 4 256 | assert room.doors[wall_idx] is None, "door exists on this wall" 257 | assert room.neighbors[wall_idx], "invalid wall" 258 | 259 | neighbor = room.neighbors[wall_idx] 260 | 261 | tx, ty = room.top 262 | w, h = room.size 263 | 264 | # Ordering of walls is right, down, left, up 265 | if wall_idx == 0: 266 | for i in range(1, h - 1): 267 | self.grid.set(tx + w - 1, ty + i, None) 268 | elif wall_idx == 1: 269 | for i in range(1, w - 1): 270 | self.grid.set(tx + i, ty + h - 1, None) 271 | elif wall_idx == 2: 272 | for i in range(1, h - 1): 273 | self.grid.set(tx, ty + i, None) 274 | elif wall_idx == 3: 275 | for i in range(1, w - 1): 276 | self.grid.set(tx + i, ty, None) 277 | else: 278 | assert False, "invalid wall index" 279 | 280 | # Mark the rooms as connected 281 | room.doors[wall_idx] = True 282 | neighbor.doors[(wall_idx+2) % 4] = True 283 | 284 | def place_agent(self, i=None, j=None, rand_dir=True): 285 | """ 286 | Place the agent in a room 287 | """ 288 | 289 | if i == None: 290 | i = self._rand_int(0, self.num_cols) 291 | if j == None: 292 | j = self._rand_int(0, self.num_rows) 293 | 294 | room = self.room_grid[j][i] 295 | 296 | # Find a position that is not right in front of an object 297 | while True: 298 | super().place_agent(room.top, room.size, rand_dir, max_tries=1000) 299 | front_cell = self.grid.get(*self.front_pos) 300 | if front_cell is None or front_cell.type is 'wall': 301 | break 302 | 303 | return self.agent_pos 304 | 305 | def connect_all(self, door_colors=COLOR_NAMES, max_itrs=5000): 306 | """ 307 | Make sure that all rooms are reachable by the agent from its 308 | starting position 309 | """ 310 | 311 | start_room = self.room_from_pos(*self.agent_pos) 312 | 313 | added_doors = [] 314 | 315 | def find_reach(): 316 | reach = set() 317 | stack = [start_room] 318 | while len(stack) > 0: 319 | room = stack.pop() 320 | if room in reach: 321 | continue 322 | reach.add(room) 323 | for i in range(0, 4): 324 | if room.doors[i]: 325 | stack.append(room.neighbors[i]) 326 | return reach 327 | 328 | num_itrs = 0 329 | 330 | while True: 331 | # This is to handle rare situations where random sampling produces 332 | # a level that cannot be connected, producing in an infinite loop 333 | if num_itrs > max_itrs: 334 | raise RecursionError('connect_all failed') 335 | num_itrs += 1 336 | 337 | # If all rooms are reachable, stop 338 | reach = find_reach() 339 | if len(reach) == self.num_rows * self.num_cols: 340 | break 341 | 342 | # Pick a random room and door position 343 | i = self._rand_int(0, self.num_cols) 344 | j = self._rand_int(0, self.num_rows) 345 | k = self._rand_int(0, 4) 346 | room = self.get_room(i, j) 347 | 348 | # If there is already a door there, skip 349 | if not room.door_pos[k] or room.doors[k]: 350 | continue 351 | 352 | if room.locked or room.neighbors[k].locked: 353 | continue 354 | 355 | color = self._rand_elem(door_colors) 356 | door, _ = self.add_door(i, j, k, color, False) 357 | added_doors.append(door) 358 | 359 | return added_doors 360 | 361 | def add_distractors(self, i=None, j=None, num_distractors=10, all_unique=True): 362 | """ 363 | Add random objects that can potentially distract/confuse the agent. 364 | """ 365 | 366 | # Collect a list of existing objects 367 | objs = [] 368 | for row in self.room_grid: 369 | for room in row: 370 | for obj in room.objs: 371 | objs.append((obj.type, obj.color)) 372 | 373 | # List of distractors added 374 | dists = [] 375 | 376 | while len(dists) < num_distractors: 377 | color = self._rand_elem(COLOR_NAMES) 378 | type = self._rand_elem(['key', 'ball', 'box']) 379 | obj = (type, color) 380 | 381 | if all_unique and obj in objs: 382 | continue 383 | 384 | # Add the object to a random room if no room specified 385 | room_i = i 386 | room_j = j 387 | if room_i == None: 388 | room_i = self._rand_int(0, self.num_cols) 389 | if room_j == None: 390 | room_j = self._rand_int(0, self.num_rows) 391 | 392 | dist, pos = self.add_object(room_i, room_j, *obj) 393 | 394 | objs.append(obj) 395 | dists.append(dist) 396 | 397 | return dists 398 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Minimalistic Gridworld Environment (MiniGrid) 2 | 3 | [![Build Status](https://travis-ci.org/maximecb/gym-minigrid.svg?branch=master)](https://travis-ci.org/maximecb/gym-minigrid) 4 | 5 | There are other gridworld Gym environments out there, but this one is 6 | designed to be particularly simple, lightweight and fast. The code has very few 7 | dependencies, making it less likely to break or fail to install. It loads no 8 | external sprites/textures, and it can run at up to 5000 FPS on a Core i7 9 | laptop, which means you can run your experiments faster. A known-working RL 10 | implementation can be found [in this repository](https://github.com/lcswillems/torch-rl). 11 | 12 | Requirements: 13 | - Python 3.5+ 14 | - OpenAI Gym 15 | - NumPy 16 | - PyQT 5 for graphics 17 | 18 | Please use this bibtex if you want to cite this repository in your publications: 19 | 20 | ``` 21 | @misc{gym_minigrid, 22 | author = {Chevalier-Boisvert, Maxime and Willems, Lucas and Pal, Suman}, 23 | title = {Minimalistic Gridworld Environment for OpenAI Gym}, 24 | year = {2018}, 25 | publisher = {GitHub}, 26 | journal = {GitHub repository}, 27 | howpublished = {\url{https://github.com/maximecb/gym-minigrid}}, 28 | } 29 | ``` 30 | 31 | List of publications & submissions using MiniGrid (please open a pull request to add missing entries): 32 | - [Learning Effective Subgoals with Multi-Task Hierarchical Reinforcement Learning](http://surl.tirl.info/proceedings/SURL-2019_paper_10.pdf) (Tsinghua University, August 2019) 33 | - [Learning distant cause and effect using only local and immediate credit assignment](https://arxiv.org/abs/1905.11589) (Incubator 491, May 2019) 34 | - [Learning World Graphs to Accelerate Hierarchical Reinforcement Learning](https://arxiv.org/abs/1907.00664) (Salesforce Research, 2019) 35 | - [Modeling the Long Term Future in Model-Based Reinforcement Learning](https://openreview.net/forum?id=SkgQBn0cF7) (Mila, ICLR 2019) 36 | - [Practical Open-Loop Optimistic Planning](https://arxiv.org/pdf/1904.04700.pdf) (INRIA, Apr 2019) 37 | - [Unifying Ensemble Methods for Q-learning via Social Choice Theory](https://arxiv.org/pdf/1902.10646.pdf) (Max Planck Institute, Feb 2019) 38 | - [Planning Beyond The Sensing Horizon Using a Learned Context](https://personalrobotics.cs.washington.edu/workshops/mlmp2018/assets/docs/18_CameraReadySubmission.pdf) (MLMP@IROS, 2018) 39 | - [Guiding Policies with Language via Meta-Learning](https://arxiv.org/abs/1811.07882) (UC Berkeley, Nov 2018) 40 | - [On the Complexity of Exploration in Goal-Driven Navigation](https://arxiv.org/abs/1811.06889) (CMU, NIPS, Nov 2018) 41 | - [Transfer and Exploration via the Information Bottleneck](https://openreview.net/forum?id=rJg8yhAqKm) (Mila, Nov 2018) 42 | - [Modeling the Long Term Future in Model-Based Reinforcement Learning](https://openreview.net/forum?id=SkgQBn0cF7) (Nov 2018) 43 | - [Learning of Sophisticated Curriculums by viewing them as Graphs over Tasks](https://openreview.net/forum?id=rJlGdsC9Ym) (ICLR, Nov 2018, withdrawn) 44 | - [BabyAI: First Steps Towards Grounded Language Learning With a Human In the Loop](https://arxiv.org/abs/1810.08272) (Mila, Oct 2018) 45 | 46 | This environment has been built as part of work done at the [MILA](https://mila.quebec/en/). The Dynamic obstacles environment has been added as part of work done at [IAS in TU Darmstadt](https://www.ias.informatik.tu-darmstadt.de/) and the University of Genoa for mobile robot navigation with dynamic obstacles. 47 | 48 | ## Installation 49 | 50 | There is now a [pip package](https://pypi.org/project/gym-minigrid/) available, which is updated periodically: 51 | 52 | ``` 53 | pip3 install gym-minigrid 54 | ``` 55 | 56 | Alternatively, to get the latest version of MiniGrid, you can clone this repository and install the dependencies with `pip3`: 57 | 58 | ``` 59 | git clone https://github.com/maximecb/gym-minigrid.git 60 | cd gym-minigrid 61 | pip3 install -e . 62 | ``` 63 | 64 | ## Basic Usage 65 | 66 | There is a UI application which allows you to manually control the agent with the arrow keys: 67 | 68 | ``` 69 | ./manual_control.py 70 | ``` 71 | 72 | The environment being run can be selected with the `--env-name` option, eg: 73 | 74 | ``` 75 | ./manual_control.py --env-name MiniGrid-Empty-8x8-v0 76 | ``` 77 | 78 | ## Reinforcement Learning 79 | 80 | If you want to train an agent with reinforcement learning, I recommend using the code found in the [torch-rl](https://github.com/lcswillems/torch-rl) repository. This code has been tested and is known to work with this environment. The default hyper-parameters are also known to converge. 81 | 82 | A sample training command is: 83 | 84 | ``` 85 | cd torch-rl 86 | python3 -m scripts.train --env MiniGrid-Empty-8x8-v0 --algo ppo 87 | ``` 88 | 89 | ## Design 90 | 91 | MiniGrid is built to support tasks involving natural language and sparse rewards. 92 | The observations are dictionaries, with an 'image' field, partially observable 93 | view of the environment, a 'mission' field which is a textual string 94 | describing the objective the agent should reach to get a reward, and a 'direction' 95 | field which can be used as an optional compass. Using dictionaries makes it 96 | easy for you to add additional information to observations 97 | if you need to, without having to force everything into a single tensor. 98 | If your RL code expects one single tensor for observations, please take a look at 99 | `FlatObsWrapper` in 100 | [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py). 101 | 102 | The partially observable view of the environment uses a compact and efficient 103 | encoding, with just 3 input values per visible grid cell, 7x7x3 values total. 104 | If you want to obtain an array of RGB pixels instead, see the `get_obs_render` method in 105 | [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py). 106 | 107 | Structure of the world: 108 | - The world is an NxM grid of tiles 109 | - Each tile in the grid world contains zero or one object 110 | - Cells that do not contain an object have the value `None` 111 | - Each object has an associated discrete color (string) 112 | - Each object has an associated type (string) 113 | - Provided object types are: wall, floor, lava, door, key, ball, box and goal 114 | - The agent can pick up and carry exactly one object (eg: ball or key) 115 | - To open a locked door, the agent has to be carrying a key matching the door's color 116 | 117 | Actions in the basic environment: 118 | - Turn left 119 | - Turn right 120 | - Move forward 121 | - Pick up an object 122 | - Drop the object being carried 123 | - Toggle (open doors, interact with objects) 124 | - Done (task completed, optional) 125 | 126 | By default, sparse rewards are given for reaching a green goal tile. A 127 | reward of 1 is given for success, and zero for failure. There is also an 128 | environment-specific time step limit for completing the task. 129 | You can define your own reward function by creating a class derived 130 | from `MiniGridEnv`. Extending the environment with new object types or action 131 | should be very easy. If you wish to do this, you should take a look at the 132 | [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file. 133 | 134 | ## Included Environments 135 | 136 | The environments listed below are implemented in the [gym_minigrid/envs](/gym_minigrid/envs) directory. 137 | Each environment provides one or more configurations registered with OpenAI gym. Each environment 138 | is also programmatically tunable in terms of size/complexity, which is useful for curriculum learning 139 | or to fine-tune difficulty. 140 | 141 | ### Empty environment 142 | 143 | Registered configurations: 144 | - `MiniGrid-Empty-5x5-v0` 145 | - `MiniGrid-Empty-Random-5x5-v0` 146 | - `MiniGrid-Empty-6x6-v0` 147 | - `MiniGrid-Empty-Random-6x6-v0` 148 | - `MiniGrid-Empty-8x8-v0` 149 | - `MiniGrid-Empty-16x16-v0` 150 | 151 |

152 | 153 |

154 | 155 | This environment is an empty room, and the goal of the agent is to reach the 156 | green goal square, which provides a sparse reward. A small penalty is 157 | subtracted for the number of steps to reach the goal. This environment is 158 | useful, with small rooms, to validate that your RL algorithm works correctly, 159 | and with large rooms to experiment with sparse rewards and exploration. 160 | The random variants of the environment have the agent starting at a random 161 | position for each episode, while the regular variants have the agent always 162 | starting in the corner opposite to the goal. 163 | 164 | ### Four rooms environment 165 | 166 | Registered configurations: 167 | - `MiniGrid-FourRooms-v0` 168 | 169 |

170 | 171 |

172 | 173 | Classic four room reinforcement learning environment. The agent must navigate 174 | in a maze composed of four rooms interconnected by 4 gaps in the walls. To 175 | obtain a reward, the agent must reach the green goal square. Both the agent 176 | and the goal square are randomly placed in any of the four rooms. 177 | 178 | ### Door & key environment 179 | 180 | Registered configurations: 181 | - `MiniGrid-DoorKey-5x5-v0` 182 | - `MiniGrid-DoorKey-6x6-v0` 183 | - `MiniGrid-DoorKey-8x8-v0` 184 | - `MiniGrid-DoorKey-16x16-v0` 185 | 186 |

187 | 188 |

189 | 190 | This environment has a key that the agent must pick up in order to unlock 191 | a goal and then get to the green goal square. This environment is difficult, 192 | because of the sparse reward, to solve using classical RL algorithms. It is 193 | useful to experiment with curiosity or curriculum learning. 194 | 195 | ### Multi-room environment 196 | 197 | Registered configurations: 198 | - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms) 199 | - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms) 200 | - `MiniGrid-MultiRoom-N6-v0` (six rooms) 201 | 202 |

203 | 204 |

205 | 206 | This environment has a series of connected rooms with doors that must be 207 | opened in order to get to the next room. The final room has the green goal 208 | square the agent must get to. This environment is extremely difficult to 209 | solve using RL alone. However, by gradually increasing the number of 210 | rooms and building a curriculum, the environment can be solved. 211 | 212 | ### Fetch environment 213 | 214 | Registered configurations: 215 | - `MiniGrid-Fetch-5x5-N2-v0` 216 | - `MiniGrid-Fetch-6x6-N2-v0` 217 | - `MiniGrid-Fetch-8x8-N3-v0` 218 | 219 |

220 | 221 |

222 | 223 | This environment has multiple objects of assorted types and colors. The 224 | agent receives a textual string as part of its observation telling it 225 | which object to pick up. Picking up the wrong object produces a negative 226 | reward. 227 | 228 | ### Go-to-door environment 229 | 230 | Registered configurations: 231 | - `MiniGrid-GoToDoor-5x5-v0` 232 | - `MiniGrid-GoToDoor-6x6-v0` 233 | - `MiniGrid-GoToDoor-8x8-v0` 234 | 235 |

236 | 237 |

238 | 239 | This environment is a room with four doors, one on each wall. The agent 240 | receives a textual (mission) string as input, telling it which door to go to, 241 | (eg: "go to the red door"). It receives a positive reward for performing the 242 | `done` action next to the correct door, as indicated in the mission string. 243 | 244 | ### Put-near environment 245 | 246 | Registered configurations: 247 | - `MiniGrid-PutNear-6x6-N2-v0` 248 | - `MiniGrid-PutNear-8x8-N3-v0` 249 | 250 | The agent is instructed through a textual string to pick up an object and 251 | place it next to another object. This environment is easy to solve with two 252 | objects, but difficult to solve with more, as it involves both textual 253 | understanding and spatial reasoning involving multiple objects. 254 | 255 | ### Red and blue doors environment 256 | 257 | Registered configurations: 258 | - `MiniGrid-RedBlueDoors-6x6-v0` 259 | - `MiniGrid-RedBlueDoors-8x8-v0` 260 | 261 | The purpose of this environment is to test memory. 262 | The agent is randomly placed within a room with one red and one blue door 263 | facing opposite directions. The agent has to open the red door and then open 264 | the blue door, in that order. The agent, when facing one door, cannot see 265 | the door behind him. Hence, the agent needs to remember whether or not he has 266 | previously opened the other door in order to reliably succeed at completing 267 | the task. 268 | 269 | ### Memory environment 270 | 271 | Registered configurations: 272 | - `MiniGrid-MemoryS17Random-v0` 273 | - `MiniGrid-MemoryS13Random-v0` 274 | - `MiniGrid-MemoryS13-v0` 275 | - `MiniGrid-MemoryS11-v0` 276 | - `MiniGrid-MemoryS9-v0` 277 | - `MiniGrid-MemoryS7-v0` 278 | 279 | This environment is a memory test. The agent starts in a small room 280 | where it sees an object. It then has to go through a narrow hallway 281 | which ends in a split. At each end of the split there is an object, 282 | one of which is the same as the object in the starting room. The 283 | agent has to remember the initial object, and go to the matching 284 | object at split. 285 | 286 | ### Locked room environment 287 | 288 | Registed configurations: 289 | - `MiniGrid-LockedRoom-v0` 290 | 291 | The environment has six rooms, one of which is locked. The agent receives 292 | a textual mission string as input, telling it which room to go to in order 293 | to get the key that opens the locked room. It then has to go into the locked 294 | room in order to reach the final goal. This environment is extremely difficult 295 | to solve with vanilla reinforcement learning alone. 296 | 297 | ### Key corridor environment 298 | 299 | Registed configurations: 300 | - `MiniGrid-KeyCorridorS3R1-v0` 301 | - `MiniGrid-KeyCorridorS3R2-v0` 302 | - `MiniGrid-KeyCorridorS3R3-v0` 303 | - `MiniGrid-KeyCorridorS4R3-v0` 304 | - `MiniGrid-KeyCorridorS5R3-v0` 305 | - `MiniGrid-KeyCorridorS6R3-v0` 306 | 307 |

308 | 309 | 310 | 311 | 312 | 313 | 314 |

315 | 316 | This environment is similar to the locked room environment, but there are 317 | multiple registered environment configurations of increasing size, 318 | making it easier to use curriculum learning to train an agent to solve it. 319 | The agent has to pick up an object which is behind a locked door. The key is 320 | hidden in another room, and the agent has to explore the environment to find 321 | it. The mission string does not give the agent any clues as to where the 322 | key is placed. This environment can be solved without relying on language. 323 | 324 | ### Unlock environment 325 | 326 | Registed configurations: 327 | - `MiniGrid-Unlock-v0` 328 | 329 |

330 | 331 |

332 | 333 | The agent has to open a locked door. This environment can be solved without 334 | relying on language. 335 | 336 | ### Unlock pickup environment 337 | 338 | Registed configurations: 339 | - `MiniGrid-UnlockPickup-v0` 340 | 341 |

342 | 343 |

344 | 345 | The agent has to pick up a box which is placed in another room, behind a 346 | locked door. This environment can be solved without relying on language. 347 | 348 | ### Blocked unlock pickup environment 349 | 350 | Registed configurations: 351 | - `MiniGrid-BlockedUnlockPickup-v0` 352 | 353 |

354 | 355 |

356 | 357 | The agent has to pick up a box which is placed in another room, behind a 358 | locked door. The door is also blocked by a ball which the agent has to move 359 | before it can unlock the door. Hence, the agent has to learn to move the ball, 360 | pick up the key, open the door and pick up the object in the other room. 361 | This environment can be solved without relying on language. 362 | 363 | ## Obstructed maze environment 364 | 365 | Registered configurations: 366 | - `MiniGrid-ObstructedMaze-1Dl-v0` 367 | - `MiniGrid-ObstructedMaze-1Dlh-v0` 368 | - `MiniGrid-ObstructedMaze-1Dlhb-v0` 369 | - `MiniGrid-ObstructedMaze-2Dl-v0` 370 | - `MiniGrid-ObstructedMaze-2Dlh-v0` 371 | - `MiniGrid-ObstructedMaze-2Dlhb-v0` 372 | - `MiniGrid-ObstructedMaze-1Q-v0` 373 | - `MiniGrid-ObstructedMaze-2Q-v0` 374 | - `MiniGrid-ObstructedMaze-Full-v0` 375 | 376 |

377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 |

387 | 388 | The agent has to pick up a box which is placed in a corner of a 3x3 maze. 389 | The doors are locked, the keys are hidden in boxes and doors are obstructed 390 | by balls. This environment can be solved without relying on language. 391 | 392 | The agent has to pick up a box which is placed in a corner of a 3x3 maze. 393 | The doors are locked, the keys are hidden in boxes and doors are obstructed 394 | by balls. This environment can be solved without relying on language. 395 | 396 | ## Lava crossing environment 397 | 398 | Registered configurations: 399 | - `MiniGrid-LavaCrossingS9N1-v0` 400 | - `MiniGrid-LavaCrossingS9N2-v0` 401 | - `MiniGrid-LavaCrossingS9N3-v0` 402 | - `MiniGrid-LavaCrossingS11N5-v0` 403 | 404 |

405 | 406 | 407 | 408 | 409 |

410 | 411 | The agent has to reach the green goal square on the other corner of the room 412 | while avoiding rivers of deadly lava which terminate the episode in failure. 413 | Each lava stream runs across the room either horizontally or vertically, and 414 | has a single crossing point which can be safely used; Luckily, a path to the 415 | goal is guaranteed to exist. This environment is useful for studying safety and 416 | safe exploration. 417 | 418 | ## Distributional shift environment 419 | 420 | Registered configurations: 421 | - `MiniGrid-DistShift1-v0` 422 | - `MiniGrid-DistShift2-v0` 423 | 424 | This environment is based on one of the DeepMind [AI safety gridworlds](https://github.com/deepmind/ai-safety-gridworlds). 425 | The agent starts in the top-left corner and must reach the goal which is in the top-right corner, but has to avoid stepping 426 | into lava on its way. The aim of this environment is to test an agent's ability to generalize. There are two slightly 427 | different variants of the environment, so that the agent can be trained on one variant and tested on the other. 428 | 429 |

430 | 431 | 432 |

433 | 434 | ## Simple crossing environment 435 | 436 | Registered configurations: 437 | - `MiniGrid-SimpleCrossingS9N1-v0` 438 | - `MiniGrid-SimpleCrossingS9N2-v0` 439 | - `MiniGrid-SimpleCrossingS9N3-v0` 440 | - `MiniGrid-SimpleCrossingS11N5-v0` 441 | 442 |

443 | 444 | 445 | 446 | 447 |

448 | 449 | Similar to the `LavaCrossing` environment, the agent has to reach the green 450 | goal square on the other corner of the room, however lava is replaced by 451 | walls. This MDP is therefore much easier and and maybe useful for quickly 452 | testing your algorithms. 453 | 454 | ### Dynamic obstacles environment 455 | 456 | Registered configurations: 457 | - `MiniGrid-Dynamic-Obstacles-5x5-v0` 458 | - `MiniGrid-Dynamic-Obstacles-Random-5x5-v0` 459 | - `MiniGrid-Dynamic-Obstacles-6x6-v0` 460 | - `MiniGrid-Dynamic-Obstacles-Random-6x6-v0` 461 | - `MiniGrid-Dynamic-Obstacles-8x8-v0` 462 | - `MiniGrid-Dynamic-Obstacles-16x16-v0` 463 | 464 |

465 | 466 |

467 | 468 | This environment is an empty room with moving obstacles. The goal of the agent is to reach the green goal square without colliding with any obstacle. A large penalty is subtracted if the agent collides with an obstacle and the episode finishes. This environment is useful to test Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in Partial Observability. 469 | -------------------------------------------------------------------------------- /gym_minigrid/minigrid.py: -------------------------------------------------------------------------------- 1 | import math 2 | import gym 3 | from enum import IntEnum 4 | import numpy as np 5 | from gym import error, spaces, utils 6 | from gym.utils import seeding 7 | 8 | # Size in pixels of a cell in the full-scale human view 9 | CELL_PIXELS = 32 10 | 11 | # Map of color names to RGB values 12 | COLORS = { 13 | 'red' : np.array([255, 0, 0]), 14 | 'green' : np.array([0, 255, 0]), 15 | 'blue' : np.array([0, 0, 255]), 16 | 'purple': np.array([112, 39, 195]), 17 | 'yellow': np.array([255, 255, 0]), 18 | 'grey' : np.array([100, 100, 100]) 19 | } 20 | 21 | COLOR_NAMES = sorted(list(COLORS.keys())) 22 | 23 | # Used to map colors to integers 24 | COLOR_TO_IDX = { 25 | 'red' : 0, 26 | 'green' : 1, 27 | 'blue' : 2, 28 | 'purple': 3, 29 | 'yellow': 4, 30 | 'grey' : 5 31 | } 32 | 33 | IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys())) 34 | 35 | # Map of object type to integers 36 | OBJECT_TO_IDX = { 37 | 'unseen' : 0, 38 | 'empty' : 1, 39 | 'wall' : 2, 40 | 'floor' : 3, 41 | 'door' : 4, 42 | 'key' : 5, 43 | 'ball' : 6, 44 | 'box' : 7, 45 | 'goal' : 8, 46 | 'lava' : 9, 47 | 'agent' : 10, 48 | } 49 | 50 | IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys())) 51 | 52 | # Map of agent direction indices to vectors 53 | DIR_TO_VEC = [ 54 | # Pointing right (positive X) 55 | np.array((1, 0)), 56 | # Down (positive Y) 57 | np.array((0, 1)), 58 | # Pointing left (negative X) 59 | np.array((-1, 0)), 60 | # Up (negative Y) 61 | np.array((0, -1)), 62 | ] 63 | 64 | class WorldObj: 65 | """ 66 | Base class for grid world objects 67 | """ 68 | 69 | def __init__(self, type, color): 70 | assert type in OBJECT_TO_IDX, type 71 | assert color in COLOR_TO_IDX, color 72 | self.type = type 73 | self.color = color 74 | self.contains = None 75 | 76 | # Initial position of the object 77 | self.init_pos = None 78 | 79 | # Current position of the object 80 | self.cur_pos = None 81 | 82 | def can_overlap(self): 83 | """Can the agent overlap with this?""" 84 | return False 85 | 86 | def can_pickup(self): 87 | """Can the agent pick this up?""" 88 | return False 89 | 90 | def can_contain(self): 91 | """Can this contain another object?""" 92 | return False 93 | 94 | def see_behind(self): 95 | """Can the agent see behind this object?""" 96 | return True 97 | 98 | def toggle(self, env, pos): 99 | """Method to trigger/toggle an action this object performs""" 100 | return False 101 | 102 | def render(self, r): 103 | """Draw this object with the given renderer""" 104 | raise NotImplementedError 105 | 106 | def _set_color(self, r): 107 | """Set the color of this object as the active drawing color""" 108 | c = COLORS[self.color] 109 | r.setLineColor(c[0], c[1], c[2]) 110 | r.setColor(c[0], c[1], c[2]) 111 | 112 | class Goal(WorldObj): 113 | def __init__(self): 114 | super().__init__('goal', 'green') 115 | 116 | def can_overlap(self): 117 | return True 118 | 119 | def render(self, r): 120 | self._set_color(r) 121 | r.drawPolygon([ 122 | (0 , CELL_PIXELS), 123 | (CELL_PIXELS, CELL_PIXELS), 124 | (CELL_PIXELS, 0), 125 | (0 , 0) 126 | ]) 127 | 128 | class Floor(WorldObj): 129 | """ 130 | Colored floor tile the agent can walk over 131 | """ 132 | 133 | def __init__(self, color='blue'): 134 | super().__init__('floor', color) 135 | 136 | def can_overlap(self): 137 | return True 138 | 139 | def render(self, r): 140 | # Give the floor a pale color 141 | c = COLORS[self.color] 142 | r.setLineColor(100, 100, 100, 0) 143 | r.setColor(*c/2) 144 | r.drawPolygon([ 145 | (1 , CELL_PIXELS), 146 | (CELL_PIXELS, CELL_PIXELS), 147 | (CELL_PIXELS, 1), 148 | (1 , 1) 149 | ]) 150 | 151 | class Lava(WorldObj): 152 | def __init__(self): 153 | super().__init__('lava', 'red') 154 | 155 | def can_overlap(self): 156 | return True 157 | 158 | def render(self, r): 159 | orange = 255, 128, 0 160 | r.setLineColor(*orange) 161 | r.setColor(*orange) 162 | r.drawPolygon([ 163 | (0 , CELL_PIXELS), 164 | (CELL_PIXELS, CELL_PIXELS), 165 | (CELL_PIXELS, 0), 166 | (0 , 0) 167 | ]) 168 | 169 | # drawing the waves 170 | r.setLineColor(0, 0, 0) 171 | 172 | r.drawPolyline([ 173 | (.1 * CELL_PIXELS, .3 * CELL_PIXELS), 174 | (.3 * CELL_PIXELS, .4 * CELL_PIXELS), 175 | (.5 * CELL_PIXELS, .3 * CELL_PIXELS), 176 | (.7 * CELL_PIXELS, .4 * CELL_PIXELS), 177 | (.9 * CELL_PIXELS, .3 * CELL_PIXELS), 178 | ]) 179 | 180 | r.drawPolyline([ 181 | (.1 * CELL_PIXELS, .5 * CELL_PIXELS), 182 | (.3 * CELL_PIXELS, .6 * CELL_PIXELS), 183 | (.5 * CELL_PIXELS, .5 * CELL_PIXELS), 184 | (.7 * CELL_PIXELS, .6 * CELL_PIXELS), 185 | (.9 * CELL_PIXELS, .5 * CELL_PIXELS), 186 | ]) 187 | 188 | r.drawPolyline([ 189 | (.1 * CELL_PIXELS, .7 * CELL_PIXELS), 190 | (.3 * CELL_PIXELS, .8 * CELL_PIXELS), 191 | (.5 * CELL_PIXELS, .7 * CELL_PIXELS), 192 | (.7 * CELL_PIXELS, .8 * CELL_PIXELS), 193 | (.9 * CELL_PIXELS, .7 * CELL_PIXELS), 194 | ]) 195 | 196 | class Wall(WorldObj): 197 | def __init__(self, color='grey'): 198 | super().__init__('wall', color) 199 | 200 | def see_behind(self): 201 | return False 202 | 203 | def render(self, r): 204 | self._set_color(r) 205 | r.drawPolygon([ 206 | (0 , CELL_PIXELS), 207 | (CELL_PIXELS, CELL_PIXELS), 208 | (CELL_PIXELS, 0), 209 | (0 , 0) 210 | ]) 211 | 212 | class Door(WorldObj): 213 | def __init__(self, color, is_open=False, is_locked=False): 214 | super().__init__('door', color) 215 | self.is_open = is_open 216 | self.is_locked = is_locked 217 | 218 | def can_overlap(self): 219 | """The agent can only walk over this cell when the door is open""" 220 | return self.is_open 221 | 222 | def see_behind(self): 223 | return self.is_open 224 | 225 | def toggle(self, env, pos): 226 | # If the player has the right key to open the door 227 | if self.is_locked: 228 | if isinstance(env.carrying, Key) and env.carrying.color == self.color: 229 | self.is_locked = False 230 | self.is_open = True 231 | return True 232 | return False 233 | 234 | self.is_open = not self.is_open 235 | return True 236 | 237 | def render(self, r): 238 | c = COLORS[self.color] 239 | r.setLineColor(c[0], c[1], c[2]) 240 | r.setColor(c[0], c[1], c[2], 50 if self.is_locked else 0) 241 | 242 | if self.is_open: 243 | r.drawPolygon([ 244 | (CELL_PIXELS-2, CELL_PIXELS), 245 | (CELL_PIXELS , CELL_PIXELS), 246 | (CELL_PIXELS , 0), 247 | (CELL_PIXELS-2, 0) 248 | ]) 249 | return 250 | 251 | r.drawPolygon([ 252 | (0 , CELL_PIXELS), 253 | (CELL_PIXELS, CELL_PIXELS), 254 | (CELL_PIXELS, 0), 255 | (0 , 0) 256 | ]) 257 | r.drawPolygon([ 258 | (2 , CELL_PIXELS-2), 259 | (CELL_PIXELS-2, CELL_PIXELS-2), 260 | (CELL_PIXELS-2, 2), 261 | (2 , 2) 262 | ]) 263 | 264 | if self.is_locked: 265 | # Draw key slot 266 | r.drawLine( 267 | CELL_PIXELS * 0.55, 268 | CELL_PIXELS * 0.5, 269 | CELL_PIXELS * 0.75, 270 | CELL_PIXELS * 0.5 271 | ) 272 | else: 273 | # Draw door handle 274 | r.drawCircle(CELL_PIXELS * 0.75, CELL_PIXELS * 0.5, 2) 275 | 276 | class Key(WorldObj): 277 | def __init__(self, color='blue'): 278 | super(Key, self).__init__('key', color) 279 | 280 | def can_pickup(self): 281 | return True 282 | 283 | def render(self, r): 284 | self._set_color(r) 285 | 286 | # Vertical quad 287 | r.drawPolygon([ 288 | (16, 10), 289 | (20, 10), 290 | (20, 28), 291 | (16, 28) 292 | ]) 293 | 294 | # Teeth 295 | r.drawPolygon([ 296 | (12, 19), 297 | (16, 19), 298 | (16, 21), 299 | (12, 21) 300 | ]) 301 | r.drawPolygon([ 302 | (12, 26), 303 | (16, 26), 304 | (16, 28), 305 | (12, 28) 306 | ]) 307 | 308 | r.drawCircle(18, 9, 6) 309 | r.setLineColor(0, 0, 0) 310 | r.setColor(0, 0, 0) 311 | r.drawCircle(18, 9, 2) 312 | 313 | class Ball(WorldObj): 314 | def __init__(self, color='blue'): 315 | super(Ball, self).__init__('ball', color) 316 | 317 | def can_pickup(self): 318 | return True 319 | 320 | def render(self, r): 321 | self._set_color(r) 322 | r.drawCircle(CELL_PIXELS * 0.5, CELL_PIXELS * 0.5, 10) 323 | 324 | class Box(WorldObj): 325 | def __init__(self, color, contains=None): 326 | super(Box, self).__init__('box', color) 327 | self.contains = contains 328 | 329 | def can_pickup(self): 330 | return True 331 | 332 | def render(self, r): 333 | c = COLORS[self.color] 334 | r.setLineColor(c[0], c[1], c[2]) 335 | r.setColor(0, 0, 0) 336 | r.setLineWidth(2) 337 | 338 | r.drawPolygon([ 339 | (4 , CELL_PIXELS-4), 340 | (CELL_PIXELS-4, CELL_PIXELS-4), 341 | (CELL_PIXELS-4, 4), 342 | (4 , 4) 343 | ]) 344 | 345 | r.drawLine( 346 | 4, 347 | CELL_PIXELS / 2, 348 | CELL_PIXELS - 4, 349 | CELL_PIXELS / 2 350 | ) 351 | 352 | r.setLineWidth(1) 353 | 354 | def toggle(self, env, pos): 355 | # Replace the box by its contents 356 | env.grid.set(*pos, self.contains) 357 | return True 358 | 359 | class Grid: 360 | """ 361 | Represent a grid and operations on it 362 | """ 363 | 364 | def __init__(self, width, height): 365 | assert width >= 3 366 | assert height >= 3 367 | 368 | self.width = width 369 | self.height = height 370 | 371 | self.grid = [None] * width * height 372 | 373 | def __contains__(self, key): 374 | if isinstance(key, WorldObj): 375 | for e in self.grid: 376 | if e is key: 377 | return True 378 | elif isinstance(key, tuple): 379 | for e in self.grid: 380 | if e is None: 381 | continue 382 | if (e.color, e.type) == key: 383 | return True 384 | if key[0] is None and key[1] == e.type: 385 | return True 386 | return False 387 | 388 | def __eq__(self, other): 389 | grid1 = self.encode() 390 | grid2 = other.encode() 391 | return np.array_equal(grid2, grid1) 392 | 393 | def __ne__(self, other): 394 | return not self == other 395 | 396 | def copy(self): 397 | from copy import deepcopy 398 | return deepcopy(self) 399 | 400 | def set(self, i, j, v): 401 | assert i >= 0 and i < self.width 402 | assert j >= 0 and j < self.height 403 | self.grid[j * self.width + i] = v 404 | 405 | def get(self, i, j): 406 | assert i >= 0 and i < self.width 407 | assert j >= 0 and j < self.height 408 | return self.grid[j * self.width + i] 409 | 410 | def horz_wall(self, x, y, length=None): 411 | if length is None: 412 | length = self.width - x 413 | for i in range(0, length): 414 | self.set(x + i, y, Wall()) 415 | 416 | def vert_wall(self, x, y, length=None): 417 | if length is None: 418 | length = self.height - y 419 | for j in range(0, length): 420 | self.set(x, y + j, Wall()) 421 | 422 | def wall_rect(self, x, y, w, h): 423 | self.horz_wall(x, y, w) 424 | self.horz_wall(x, y+h-1, w) 425 | self.vert_wall(x, y, h) 426 | self.vert_wall(x+w-1, y, h) 427 | 428 | def rotate_left(self): 429 | """ 430 | Rotate the grid to the left (counter-clockwise) 431 | """ 432 | 433 | grid = Grid(self.height, self.width) 434 | 435 | for i in range(self.width): 436 | for j in range(self.height): 437 | v = self.get(i, j) 438 | grid.set(j, grid.height - 1 - i, v) 439 | 440 | return grid 441 | 442 | def slice(self, topX, topY, width, height): 443 | """ 444 | Get a subset of the grid 445 | """ 446 | 447 | grid = Grid(width, height) 448 | 449 | for j in range(0, height): 450 | for i in range(0, width): 451 | x = topX + i 452 | y = topY + j 453 | 454 | if x >= 0 and x < self.width and \ 455 | y >= 0 and y < self.height: 456 | v = self.get(x, y) 457 | else: 458 | v = Wall() 459 | 460 | grid.set(i, j, v) 461 | 462 | return grid 463 | 464 | def render(self, r, tile_size): 465 | """ 466 | Render this grid at a given scale 467 | :param r: target renderer object 468 | :param tile_size: tile size in pixels 469 | """ 470 | 471 | assert r.width == self.width * tile_size 472 | assert r.height == self.height * tile_size 473 | 474 | # Total grid size at native scale 475 | widthPx = self.width * CELL_PIXELS 476 | heightPx = self.height * CELL_PIXELS 477 | 478 | r.push() 479 | 480 | # Internally, we draw at the "large" full-grid resolution, but we 481 | # use the renderer to scale back to the desired size 482 | r.scale(tile_size / CELL_PIXELS, tile_size / CELL_PIXELS) 483 | 484 | # Draw the background of the in-world cells black 485 | r.fillRect( 486 | 0, 487 | 0, 488 | widthPx, 489 | heightPx, 490 | 0, 0, 0 491 | ) 492 | 493 | # Draw grid lines 494 | r.setLineColor(100, 100, 100) 495 | for rowIdx in range(0, self.height): 496 | y = CELL_PIXELS * rowIdx 497 | r.drawLine(0, y, widthPx, y) 498 | for colIdx in range(0, self.width): 499 | x = CELL_PIXELS * colIdx 500 | r.drawLine(x, 0, x, heightPx) 501 | 502 | # Render the grid 503 | for j in range(0, self.height): 504 | for i in range(0, self.width): 505 | cell = self.get(i, j) 506 | if cell == None: 507 | continue 508 | r.push() 509 | r.translate(i * CELL_PIXELS, j * CELL_PIXELS) 510 | cell.render(r) 511 | r.pop() 512 | 513 | r.pop() 514 | 515 | def encode(self, vis_mask=None): 516 | """ 517 | Produce a compact numpy encoding of the grid 518 | """ 519 | 520 | if vis_mask is None: 521 | vis_mask = np.ones((self.width, self.height), dtype=bool) 522 | 523 | array = np.zeros((self.width, self.height, 3), dtype='uint8') 524 | for i in range(self.width): 525 | for j in range(self.height): 526 | if vis_mask[i, j]: 527 | v = self.get(i, j) 528 | 529 | if v is None: 530 | array[i, j, 0] = OBJECT_TO_IDX['empty'] 531 | array[i, j, 1] = 0 532 | array[i, j, 2] = 0 533 | else: 534 | # State, 0: open, 1: closed, 2: locked 535 | state = 0 536 | if hasattr(v, 'is_open') and not v.is_open: 537 | state = 1 538 | if hasattr(v, 'is_locked') and v.is_locked: 539 | state = 2 540 | 541 | array[i, j, 0] = OBJECT_TO_IDX[v.type] 542 | array[i, j, 1] = COLOR_TO_IDX[v.color] 543 | array[i, j, 2] = state 544 | 545 | return array 546 | 547 | @staticmethod 548 | def decode(array): 549 | """ 550 | Decode an array grid encoding back into a grid 551 | """ 552 | 553 | width, height, channels = array.shape 554 | assert channels == 3 555 | 556 | grid = Grid(width, height) 557 | for i in range(width): 558 | for j in range(height): 559 | typeIdx, colorIdx, state = array[i, j] 560 | 561 | if typeIdx == OBJECT_TO_IDX['unseen'] or \ 562 | typeIdx == OBJECT_TO_IDX['empty']: 563 | continue 564 | 565 | objType = IDX_TO_OBJECT[typeIdx] 566 | color = IDX_TO_COLOR[colorIdx] 567 | # State, 0: open, 1: closed, 2: locked 568 | is_open = state == 0 569 | is_locked = state == 2 570 | 571 | if objType == 'wall': 572 | v = Wall(color) 573 | elif objType == 'floor': 574 | v = Floor(color) 575 | elif objType == 'ball': 576 | v = Ball(color) 577 | elif objType == 'key': 578 | v = Key(color) 579 | elif objType == 'box': 580 | v = Box(color) 581 | elif objType == 'door': 582 | v = Door(color, is_open, is_locked) 583 | elif objType == 'goal': 584 | v = Goal() 585 | elif objType == 'lava': 586 | v = Lava() 587 | else: 588 | assert False, "unknown obj type in decode '%s'" % objType 589 | 590 | grid.set(i, j, v) 591 | 592 | return grid 593 | 594 | def process_vis(grid, agent_pos): 595 | mask = np.zeros(shape=(grid.width, grid.height), dtype=np.bool) 596 | 597 | mask[agent_pos[0], agent_pos[1]] = True 598 | 599 | for j in reversed(range(0, grid.height)): 600 | for i in range(0, grid.width-1): 601 | if not mask[i, j]: 602 | continue 603 | 604 | cell = grid.get(i, j) 605 | if cell and not cell.see_behind(): 606 | continue 607 | 608 | mask[i+1, j] = True 609 | if j > 0: 610 | mask[i+1, j-1] = True 611 | mask[i, j-1] = True 612 | 613 | for i in reversed(range(1, grid.width)): 614 | if not mask[i, j]: 615 | continue 616 | 617 | cell = grid.get(i, j) 618 | if cell and not cell.see_behind(): 619 | continue 620 | 621 | mask[i-1, j] = True 622 | if j > 0: 623 | mask[i-1, j-1] = True 624 | mask[i, j-1] = True 625 | 626 | for j in range(0, grid.height): 627 | for i in range(0, grid.width): 628 | if not mask[i, j]: 629 | grid.set(i, j, None) 630 | 631 | return mask 632 | 633 | class MiniGridEnv(gym.Env): 634 | """ 635 | 2D grid world game environment 636 | """ 637 | 638 | metadata = { 639 | 'render.modes': ['human', 'rgb_array', 'pixmap'], 640 | 'video.frames_per_second' : 10 641 | } 642 | 643 | # Enumeration of possible actions 644 | class Actions(IntEnum): 645 | # Turn left, turn right, move forward 646 | left = 0 647 | right = 1 648 | forward = 2 649 | 650 | # Pick up an object 651 | pickup = 3 652 | # Drop an object 653 | drop = 4 654 | # Toggle/activate an object 655 | toggle = 5 656 | 657 | # Done completing task 658 | done = 6 659 | 660 | def __init__( 661 | self, 662 | grid_size=None, 663 | width=None, 664 | height=None, 665 | max_steps=100, 666 | see_through_walls=False, 667 | seed=1337, 668 | agent_view_size=7 669 | ): 670 | # Can't set both grid_size and width/height 671 | if grid_size: 672 | assert width == None and height == None 673 | width = grid_size 674 | height = grid_size 675 | 676 | # Action enumeration for this environment 677 | self.actions = MiniGridEnv.Actions 678 | 679 | # Actions are discrete integer values 680 | self.action_space = spaces.Discrete(len(self.actions)) 681 | 682 | # Number of cells (width and height) in the agent view 683 | self.agent_view_size = agent_view_size 684 | 685 | # Observations are dictionaries containing an 686 | # encoding of the grid and a textual 'mission' string 687 | self.observation_space = spaces.Box( 688 | low=0, 689 | high=255, 690 | shape=(self.agent_view_size, self.agent_view_size, 3), 691 | dtype='uint8' 692 | ) 693 | self.observation_space = spaces.Dict({ 694 | 'image': self.observation_space 695 | }) 696 | 697 | # Range of possible rewards 698 | self.reward_range = (0, 1) 699 | 700 | # Renderer object used to render the whole grid (full-scale) 701 | self.grid_render = None 702 | 703 | # Renderer used to render observations (small-scale agent view) 704 | self.obs_render = None 705 | 706 | # Environment configuration 707 | self.width = width 708 | self.height = height 709 | self.max_steps = max_steps 710 | self.see_through_walls = see_through_walls 711 | 712 | # Current position and direction of the agent 713 | self.agent_pos = None 714 | self.agent_dir = None 715 | 716 | # Initialize the RNG 717 | self.seed(seed=seed) 718 | 719 | # Initialize the state 720 | self.reset() 721 | 722 | def reset(self): 723 | # Current position and direction of the agent 724 | self.agent_pos = None 725 | self.agent_dir = None 726 | 727 | # Generate a new random grid at the start of each episode 728 | # To keep the same grid for each episode, call env.seed() with 729 | # the same seed before calling env.reset() 730 | self._gen_grid(self.width, self.height) 731 | 732 | # These fields should be defined by _gen_grid 733 | assert self.agent_pos is not None 734 | assert self.agent_dir is not None 735 | 736 | # Check that the agent doesn't overlap with an object 737 | start_cell = self.grid.get(*self.agent_pos) 738 | assert start_cell is None or start_cell.can_overlap() 739 | 740 | # Item picked up, being carried, initially nothing 741 | self.carrying = None 742 | 743 | # Step count since episode start 744 | self.step_count = 0 745 | 746 | # Return first observation 747 | obs = self.gen_obs() 748 | return obs 749 | 750 | def seed(self, seed=1337): 751 | # Seed the random number generator 752 | self.np_random, _ = seeding.np_random(seed) 753 | return [seed] 754 | 755 | @property 756 | def steps_remaining(self): 757 | return self.max_steps - self.step_count 758 | 759 | def __str__(self): 760 | """ 761 | Produce a pretty string of the environment's grid along with the agent. 762 | A grid cell is represented by 2-character string, the first one for 763 | the object and the second one for the color. 764 | """ 765 | 766 | # Map of object types to short string 767 | OBJECT_TO_STR = { 768 | 'wall' : 'W', 769 | 'floor' : 'F', 770 | 'door' : 'D', 771 | 'key' : 'K', 772 | 'ball' : 'A', 773 | 'box' : 'B', 774 | 'goal' : 'G', 775 | 'lava' : 'V', 776 | } 777 | 778 | # Short string for opened door 779 | OPENDED_DOOR_IDS = '_' 780 | 781 | # Map agent's direction to short string 782 | AGENT_DIR_TO_STR = { 783 | 0: '>', 784 | 1: 'V', 785 | 2: '<', 786 | 3: '^' 787 | } 788 | 789 | str = '' 790 | 791 | for j in range(self.grid.height): 792 | 793 | for i in range(self.grid.width): 794 | if i == self.agent_pos[0] and j == self.agent_pos[1]: 795 | str += 2 * AGENT_DIR_TO_STR[self.agent_dir] 796 | continue 797 | 798 | c = self.grid.get(i, j) 799 | 800 | if c == None: 801 | str += ' ' 802 | continue 803 | 804 | if c.type == 'door': 805 | if c.is_open: 806 | str += '__' 807 | elif c.is_locked: 808 | str += 'L' + c.color[0].upper() 809 | else: 810 | str += 'D' + c.color[0].upper() 811 | continue 812 | 813 | str += OBJECT_TO_STR[c.type] + c.color[0].upper() 814 | 815 | if j < self.grid.height - 1: 816 | str += '\n' 817 | 818 | return str 819 | 820 | def _gen_grid(self, width, height): 821 | assert False, "_gen_grid needs to be implemented by each environment" 822 | 823 | def _reward(self): 824 | """ 825 | Compute the reward to be given upon success 826 | """ 827 | 828 | return 1 - 0.9 * (self.step_count / self.max_steps) 829 | 830 | def _rand_int(self, low, high): 831 | """ 832 | Generate random integer in [low,high[ 833 | """ 834 | 835 | return self.np_random.randint(low, high) 836 | 837 | def _rand_float(self, low, high): 838 | """ 839 | Generate random float in [low,high[ 840 | """ 841 | 842 | return self.np_random.uniform(low, high) 843 | 844 | def _rand_bool(self): 845 | """ 846 | Generate random boolean value 847 | """ 848 | 849 | return (self.np_random.randint(0, 2) == 0) 850 | 851 | def _rand_elem(self, iterable): 852 | """ 853 | Pick a random element in a list 854 | """ 855 | 856 | lst = list(iterable) 857 | idx = self._rand_int(0, len(lst)) 858 | return lst[idx] 859 | 860 | def _rand_subset(self, iterable, num_elems): 861 | """ 862 | Sample a random subset of distinct elements of a list 863 | """ 864 | 865 | lst = list(iterable) 866 | assert num_elems <= len(lst) 867 | 868 | out = [] 869 | 870 | while len(out) < num_elems: 871 | elem = self._rand_elem(lst) 872 | lst.remove(elem) 873 | out.append(elem) 874 | 875 | return out 876 | 877 | def _rand_color(self): 878 | """ 879 | Generate a random color name (string) 880 | """ 881 | 882 | return self._rand_elem(COLOR_NAMES) 883 | 884 | def _rand_pos(self, xLow, xHigh, yLow, yHigh): 885 | """ 886 | Generate a random (x,y) position tuple 887 | """ 888 | 889 | return ( 890 | self.np_random.randint(xLow, xHigh), 891 | self.np_random.randint(yLow, yHigh) 892 | ) 893 | 894 | def place_obj(self, 895 | obj, 896 | top=None, 897 | size=None, 898 | reject_fn=None, 899 | max_tries=math.inf 900 | ): 901 | """ 902 | Place an object at an empty position in the grid 903 | 904 | :param top: top-left position of the rectangle where to place 905 | :param size: size of the rectangle where to place 906 | :param reject_fn: function to filter out potential positions 907 | """ 908 | 909 | if top is None: 910 | top = (0, 0) 911 | else: 912 | top = (max(top[0], 0), max(top[1], 0)) 913 | 914 | if size is None: 915 | size = (self.grid.width, self.grid.height) 916 | 917 | num_tries = 0 918 | 919 | while True: 920 | # This is to handle with rare cases where rejection sampling 921 | # gets stuck in an infinite loop 922 | if num_tries > max_tries: 923 | raise RecursionError('rejection sampling failed in place_obj') 924 | 925 | num_tries += 1 926 | 927 | pos = np.array(( 928 | self._rand_int(top[0], min(top[0] + size[0], self.grid.width)), 929 | self._rand_int(top[1], min(top[1] + size[1], self.grid.height)) 930 | )) 931 | 932 | # Don't place the object on top of another object 933 | if self.grid.get(*pos) != None: 934 | continue 935 | 936 | # Don't place the object where the agent is 937 | if np.array_equal(pos, self.agent_pos): 938 | continue 939 | 940 | # Check if there is a filtering criterion 941 | if reject_fn and reject_fn(self, pos): 942 | continue 943 | 944 | break 945 | 946 | self.grid.set(*pos, obj) 947 | 948 | if obj is not None: 949 | obj.init_pos = pos 950 | obj.cur_pos = pos 951 | 952 | return pos 953 | 954 | def place_agent( 955 | self, 956 | top=None, 957 | size=None, 958 | rand_dir=True, 959 | max_tries=math.inf 960 | ): 961 | """ 962 | Set the agent's starting point at an empty position in the grid 963 | """ 964 | 965 | self.agent_pos = None 966 | pos = self.place_obj(None, top, size, max_tries=max_tries) 967 | self.agent_pos = pos 968 | 969 | if rand_dir: 970 | self.agent_dir = self._rand_int(0, 4) 971 | 972 | return pos 973 | 974 | @property 975 | def dir_vec(self): 976 | """ 977 | Get the direction vector for the agent, pointing in the direction 978 | of forward movement. 979 | """ 980 | 981 | assert self.agent_dir >= 0 and self.agent_dir < 4 982 | return DIR_TO_VEC[self.agent_dir] 983 | 984 | @property 985 | def right_vec(self): 986 | """ 987 | Get the vector pointing to the right of the agent. 988 | """ 989 | 990 | dx, dy = self.dir_vec 991 | return np.array((-dy, dx)) 992 | 993 | @property 994 | def front_pos(self): 995 | """ 996 | Get the position of the cell that is right in front of the agent 997 | """ 998 | 999 | return self.agent_pos + self.dir_vec 1000 | 1001 | def get_view_coords(self, i, j): 1002 | """ 1003 | Translate and rotate absolute grid coordinates (i, j) into the 1004 | agent's partially observable view (sub-grid). Note that the resulting 1005 | coordinates may be negative or outside of the agent's view size. 1006 | """ 1007 | 1008 | ax, ay = self.agent_pos 1009 | dx, dy = self.dir_vec 1010 | rx, ry = self.right_vec 1011 | 1012 | # Compute the absolute coordinates of the top-left view corner 1013 | sz = self.agent_view_size 1014 | hs = self.agent_view_size // 2 1015 | tx = ax + (dx * (sz-1)) - (rx * hs) 1016 | ty = ay + (dy * (sz-1)) - (ry * hs) 1017 | 1018 | lx = i - tx 1019 | ly = j - ty 1020 | 1021 | # Project the coordinates of the object relative to the top-left 1022 | # corner onto the agent's own coordinate system 1023 | vx = (rx*lx + ry*ly) 1024 | vy = -(dx*lx + dy*ly) 1025 | 1026 | return vx, vy 1027 | 1028 | def get_view_exts(self): 1029 | """ 1030 | Get the extents of the square set of tiles visible to the agent 1031 | Note: the bottom extent indices are not included in the set 1032 | """ 1033 | 1034 | # Facing right 1035 | if self.agent_dir == 0: 1036 | topX = self.agent_pos[0] 1037 | topY = self.agent_pos[1] - self.agent_view_size // 2 1038 | # Facing down 1039 | elif self.agent_dir == 1: 1040 | topX = self.agent_pos[0] - self.agent_view_size // 2 1041 | topY = self.agent_pos[1] 1042 | # Facing left 1043 | elif self.agent_dir == 2: 1044 | topX = self.agent_pos[0] - self.agent_view_size + 1 1045 | topY = self.agent_pos[1] - self.agent_view_size // 2 1046 | # Facing up 1047 | elif self.agent_dir == 3: 1048 | topX = self.agent_pos[0] - self.agent_view_size // 2 1049 | topY = self.agent_pos[1] - self.agent_view_size + 1 1050 | else: 1051 | assert False, "invalid agent direction" 1052 | 1053 | botX = topX + self.agent_view_size 1054 | botY = topY + self.agent_view_size 1055 | 1056 | return (topX, topY, botX, botY) 1057 | 1058 | def relative_coords(self, x, y): 1059 | """ 1060 | Check if a grid position belongs to the agent's field of view, and returns the corresponding coordinates 1061 | """ 1062 | 1063 | vx, vy = self.get_view_coords(x, y) 1064 | 1065 | if vx < 0 or vy < 0 or vx >= self.agent_view_size or vy >= self.agent_view_size: 1066 | return None 1067 | 1068 | return vx, vy 1069 | 1070 | def in_view(self, x, y): 1071 | """ 1072 | check if a grid position is visible to the agent 1073 | """ 1074 | 1075 | return self.relative_coords(x, y) is not None 1076 | 1077 | def agent_sees(self, x, y): 1078 | """ 1079 | Check if a non-empty grid position is visible to the agent 1080 | """ 1081 | 1082 | coordinates = self.relative_coords(x, y) 1083 | if coordinates is None: 1084 | return False 1085 | vx, vy = coordinates 1086 | 1087 | obs = self.gen_obs() 1088 | obs_grid = Grid.decode(obs['image']) 1089 | obs_cell = obs_grid.get(vx, vy) 1090 | world_cell = self.grid.get(x, y) 1091 | 1092 | return obs_cell is not None and obs_cell.type == world_cell.type 1093 | 1094 | def step(self, action): 1095 | self.step_count += 1 1096 | 1097 | reward = 0 1098 | done = False 1099 | 1100 | # Get the position in front of the agent 1101 | fwd_pos = self.front_pos 1102 | 1103 | # Get the contents of the cell in front of the agent 1104 | fwd_cell = self.grid.get(*fwd_pos) 1105 | 1106 | # Rotate left 1107 | if action == self.actions.left: 1108 | self.agent_dir -= 1 1109 | if self.agent_dir < 0: 1110 | self.agent_dir += 4 1111 | 1112 | # Rotate right 1113 | elif action == self.actions.right: 1114 | self.agent_dir = (self.agent_dir + 1) % 4 1115 | 1116 | # Move forward 1117 | elif action == self.actions.forward: 1118 | if fwd_cell == None or fwd_cell.can_overlap(): 1119 | self.agent_pos = fwd_pos 1120 | if fwd_cell != None and fwd_cell.type == 'goal': 1121 | done = True 1122 | reward = self._reward() 1123 | if fwd_cell != None and fwd_cell.type == 'lava': 1124 | done = True 1125 | 1126 | # Pick up an object 1127 | elif action == self.actions.pickup: 1128 | if fwd_cell and fwd_cell.can_pickup(): 1129 | if self.carrying is None: 1130 | self.carrying = fwd_cell 1131 | self.carrying.cur_pos = np.array([-1, -1]) 1132 | self.grid.set(*fwd_pos, None) 1133 | 1134 | # Drop an object 1135 | elif action == self.actions.drop: 1136 | if not fwd_cell and self.carrying: 1137 | self.grid.set(*fwd_pos, self.carrying) 1138 | self.carrying.cur_pos = fwd_pos 1139 | self.carrying = None 1140 | 1141 | # Toggle/activate an object 1142 | elif action == self.actions.toggle: 1143 | if fwd_cell: 1144 | fwd_cell.toggle(self, fwd_pos) 1145 | 1146 | # Done action (not used by default) 1147 | elif action == self.actions.done: 1148 | pass 1149 | 1150 | else: 1151 | assert False, "unknown action" 1152 | 1153 | if self.step_count >= self.max_steps: 1154 | done = True 1155 | 1156 | obs = self.gen_obs() 1157 | 1158 | return obs, reward, done, {} 1159 | 1160 | def gen_obs_grid(self): 1161 | """ 1162 | Generate the sub-grid observed by the agent. 1163 | This method also outputs a visibility mask telling us which grid 1164 | cells the agent can actually see. 1165 | """ 1166 | 1167 | topX, topY, botX, botY = self.get_view_exts() 1168 | 1169 | grid = self.grid.slice(topX, topY, self.agent_view_size, self.agent_view_size) 1170 | 1171 | for i in range(self.agent_dir + 1): 1172 | grid = grid.rotate_left() 1173 | 1174 | # Process occluders and visibility 1175 | # Note that this incurs some performance cost 1176 | if not self.see_through_walls: 1177 | vis_mask = grid.process_vis(agent_pos=(self.agent_view_size // 2 , self.agent_view_size - 1)) 1178 | else: 1179 | vis_mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool) 1180 | 1181 | # Make it so the agent sees what it's carrying 1182 | # We do this by placing the carried object at the agent's position 1183 | # in the agent's partially observable view 1184 | agent_pos = grid.width // 2, grid.height - 1 1185 | if self.carrying: 1186 | grid.set(*agent_pos, self.carrying) 1187 | else: 1188 | grid.set(*agent_pos, None) 1189 | 1190 | return grid, vis_mask 1191 | 1192 | def gen_obs(self): 1193 | """ 1194 | Generate the agent's view (partially observable, low-resolution encoding) 1195 | """ 1196 | 1197 | grid, vis_mask = self.gen_obs_grid() 1198 | 1199 | # Encode the partially observable view into a numpy array 1200 | image = grid.encode(vis_mask) 1201 | 1202 | assert hasattr(self, 'mission'), "environments must define a textual mission string" 1203 | 1204 | # Observations are dictionaries containing: 1205 | # - an image (partially observable view of the environment) 1206 | # - the agent's direction/orientation (acting as a compass) 1207 | # - a textual mission string (instructions for the agent) 1208 | obs = { 1209 | 'image': image, 1210 | 'direction': self.agent_dir, 1211 | 'mission': self.mission 1212 | } 1213 | 1214 | return obs 1215 | 1216 | def get_obs_render(self, obs, tile_size=CELL_PIXELS//2, mode='pixmap'): 1217 | """ 1218 | Render an agent observation for visualization 1219 | """ 1220 | 1221 | if self.obs_render == None: 1222 | from gym_minigrid.rendering import Renderer 1223 | self.obs_render = Renderer( 1224 | self.agent_view_size * tile_size, 1225 | self.agent_view_size * tile_size 1226 | ) 1227 | 1228 | r = self.obs_render 1229 | 1230 | r.beginFrame() 1231 | 1232 | grid = Grid.decode(obs) 1233 | 1234 | # Render the whole grid 1235 | grid.render(r, tile_size) 1236 | 1237 | # Draw the agent 1238 | ratio = tile_size / CELL_PIXELS 1239 | r.push() 1240 | r.scale(ratio, ratio) 1241 | r.translate( 1242 | CELL_PIXELS * (0.5 + self.agent_view_size // 2), 1243 | CELL_PIXELS * (self.agent_view_size - 0.5) 1244 | ) 1245 | r.rotate(3 * 90) 1246 | r.setLineColor(255, 0, 0) 1247 | r.setColor(255, 0, 0) 1248 | r.drawPolygon([ 1249 | (-12, 10), 1250 | ( 12, 0), 1251 | (-12, -10) 1252 | ]) 1253 | r.pop() 1254 | 1255 | r.endFrame() 1256 | 1257 | if mode == 'rgb_array': 1258 | return r.getArray() 1259 | elif mode == 'pixmap': 1260 | return r.getPixmap() 1261 | return r 1262 | 1263 | def render(self, mode='human', close=False, highlight=True, tile_size=CELL_PIXELS): 1264 | """ 1265 | Render the whole-grid human view 1266 | """ 1267 | 1268 | if close: 1269 | if self.grid_render: 1270 | self.grid_render.close() 1271 | return 1272 | 1273 | if self.grid_render is None or self.grid_render.window is None or (self.grid_render.width != self.width * tile_size): 1274 | from gym_minigrid.rendering import Renderer 1275 | self.grid_render = Renderer( 1276 | self.width * tile_size, 1277 | self.height * tile_size, 1278 | True if mode == 'human' else False 1279 | ) 1280 | 1281 | r = self.grid_render 1282 | 1283 | if r.window: 1284 | r.window.setText(self.mission) 1285 | 1286 | r.beginFrame() 1287 | 1288 | # Render the whole grid 1289 | self.grid.render(r, tile_size) 1290 | 1291 | # Draw the agent 1292 | ratio = tile_size / CELL_PIXELS 1293 | r.push() 1294 | r.scale(ratio, ratio) 1295 | r.translate( 1296 | CELL_PIXELS * (self.agent_pos[0] + 0.5), 1297 | CELL_PIXELS * (self.agent_pos[1] + 0.5) 1298 | ) 1299 | r.rotate(self.agent_dir * 90) 1300 | r.setLineColor(255, 0, 0) 1301 | r.setColor(255, 0, 0) 1302 | r.drawPolygon([ 1303 | (-12, 10), 1304 | ( 12, 0), 1305 | (-12, -10) 1306 | ]) 1307 | r.pop() 1308 | 1309 | # Compute which cells are visible to the agent 1310 | _, vis_mask = self.gen_obs_grid() 1311 | 1312 | # Compute the absolute coordinates of the bottom-left corner 1313 | # of the agent's view area 1314 | f_vec = self.dir_vec 1315 | r_vec = self.right_vec 1316 | top_left = self.agent_pos + f_vec * (self.agent_view_size-1) - r_vec * (self.agent_view_size // 2) 1317 | 1318 | # For each cell in the visibility mask 1319 | if highlight: 1320 | for vis_j in range(0, self.agent_view_size): 1321 | for vis_i in range(0, self.agent_view_size): 1322 | # If this cell is not visible, don't highlight it 1323 | if not vis_mask[vis_i, vis_j]: 1324 | continue 1325 | 1326 | # Compute the world coordinates of this cell 1327 | abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i) 1328 | 1329 | # Highlight the cell 1330 | r.fillRect( 1331 | abs_i * tile_size, 1332 | abs_j * tile_size, 1333 | tile_size, 1334 | tile_size, 1335 | 255, 255, 255, 75 1336 | ) 1337 | 1338 | r.endFrame() 1339 | 1340 | if mode == 'rgb_array': 1341 | return r.getArray() 1342 | elif mode == 'pixmap': 1343 | return r.getPixmap() 1344 | return r 1345 | --------------------------------------------------------------------------------