├── figures
    ├── Unlock.png
    ├── empty-env.png
    ├── fetch-env.png
    ├── DistShift1.png
    ├── DistShift2.png
    ├── multi-room.gif
    ├── UnlockPickup.png
    ├── door-key-env.png
    ├── four-rooms-env.png
    ├── gotodoor-6x6.mp4
    ├── gotodoor-6x6.png
    ├── KeyCorridorS3R1.png
    ├── KeyCorridorS3R2.png
    ├── KeyCorridorS3R3.png
    ├── KeyCorridorS4R3.png
    ├── KeyCorridorS5R3.png
    ├── KeyCorridorS6R3.png
    ├── LavaCrossingS9N1.png
    ├── LavaCrossingS9N2.png
    ├── LavaCrossingS9N3.png
    ├── BlockedUnlockPickup.png
    ├── LavaCrossingS11N5.png
    ├── ObstructedMaze-1Dl.png
    ├── ObstructedMaze-1Dlh.png
    ├── ObstructedMaze-1Q.png
    ├── ObstructedMaze-2Dl.png
    ├── ObstructedMaze-2Dlh.png
    ├── ObstructedMaze-2Q.png
    ├── ObstructedMaze-4Q.png
    ├── SimpleCrossingS11N5.png
    ├── SimpleCrossingS9N1.png
    ├── SimpleCrossingS9N2.png
    ├── SimpleCrossingS9N3.png
    ├── door-key-curriculum.gif
    ├── dynamic_obstacles.gif
    ├── ObstructedMaze-1Dlhb.png
    └── ObstructedMaze-2Dlhb.png
├── .gitignore
├── .travis.yml
├── gym_minigrid
    ├── __init__.py
    ├── register.py
    ├── envs
    │   ├── __init__.py
    │   ├── unlock.py
    │   ├── unlockpickup.py
    │   ├── blockedunlockpickup.py
    │   ├── distshift.py
    │   ├── doorkey.py
    │   ├── empty.py
    │   ├── playground_v0.py
    │   ├── redbluedoors.py
    │   ├── fourrooms.py
    │   ├── gotoobject.py
    │   ├── fetch.py
    │   ├── gotodoor.py
    │   ├── keycorridor.py
    │   ├── lockedroom.py
    │   ├── putnear.py
    │   ├── dynamicobstacles.py
    │   ├── memory.py
    │   ├── crossing.py
    │   ├── obstructedmaze.py
    │   └── multiroom.py
    ├── rendering.py
    ├── wrappers.py
    ├── roomgrid.py
    └── minigrid.py
├── setup.py
├── LICENSE
├── manual_control.py
├── run_tests.py
└── README.md


/figures/Unlock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/Unlock.png


--------------------------------------------------------------------------------
/figures/empty-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/empty-env.png


--------------------------------------------------------------------------------
/figures/fetch-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/fetch-env.png


--------------------------------------------------------------------------------
/figures/DistShift1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/DistShift1.png


--------------------------------------------------------------------------------
/figures/DistShift2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/DistShift2.png


--------------------------------------------------------------------------------
/figures/multi-room.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/multi-room.gif


--------------------------------------------------------------------------------
/figures/UnlockPickup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/UnlockPickup.png


--------------------------------------------------------------------------------
/figures/door-key-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/door-key-env.png


--------------------------------------------------------------------------------
/figures/four-rooms-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/four-rooms-env.png


--------------------------------------------------------------------------------
/figures/gotodoor-6x6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/gotodoor-6x6.mp4


--------------------------------------------------------------------------------
/figures/gotodoor-6x6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/gotodoor-6x6.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *__pycache__
3 | *egg-info
4 | trained_models
5 | 
6 | # PyPI
7 | build/*
8 | dist/*
9 | 


--------------------------------------------------------------------------------
/figures/KeyCorridorS3R1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS3R1.png


--------------------------------------------------------------------------------
/figures/KeyCorridorS3R2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS3R2.png


--------------------------------------------------------------------------------
/figures/KeyCorridorS3R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS3R3.png


--------------------------------------------------------------------------------
/figures/KeyCorridorS4R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS4R3.png


--------------------------------------------------------------------------------
/figures/KeyCorridorS5R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS5R3.png


--------------------------------------------------------------------------------
/figures/KeyCorridorS6R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/KeyCorridorS6R3.png


--------------------------------------------------------------------------------
/figures/LavaCrossingS9N1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/LavaCrossingS9N1.png


--------------------------------------------------------------------------------
/figures/LavaCrossingS9N2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/LavaCrossingS9N2.png


--------------------------------------------------------------------------------
/figures/LavaCrossingS9N3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/LavaCrossingS9N3.png


--------------------------------------------------------------------------------
/figures/BlockedUnlockPickup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/BlockedUnlockPickup.png


--------------------------------------------------------------------------------
/figures/LavaCrossingS11N5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/LavaCrossingS11N5.png


--------------------------------------------------------------------------------
/figures/ObstructedMaze-1Dl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-1Dl.png


--------------------------------------------------------------------------------
/figures/ObstructedMaze-1Dlh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-1Dlh.png


--------------------------------------------------------------------------------
/figures/ObstructedMaze-1Q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-1Q.png


--------------------------------------------------------------------------------
/figures/ObstructedMaze-2Dl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-2Dl.png


--------------------------------------------------------------------------------
/figures/ObstructedMaze-2Dlh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-2Dlh.png


--------------------------------------------------------------------------------
/figures/ObstructedMaze-2Q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-2Q.png


--------------------------------------------------------------------------------
/figures/ObstructedMaze-4Q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-4Q.png


--------------------------------------------------------------------------------
/figures/SimpleCrossingS11N5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/SimpleCrossingS11N5.png


--------------------------------------------------------------------------------
/figures/SimpleCrossingS9N1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/SimpleCrossingS9N1.png


--------------------------------------------------------------------------------
/figures/SimpleCrossingS9N2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/SimpleCrossingS9N2.png


--------------------------------------------------------------------------------
/figures/SimpleCrossingS9N3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/SimpleCrossingS9N3.png


--------------------------------------------------------------------------------
/figures/door-key-curriculum.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/door-key-curriculum.gif


--------------------------------------------------------------------------------
/figures/dynamic_obstacles.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/dynamic_obstacles.gif


--------------------------------------------------------------------------------
/figures/ObstructedMaze-1Dlhb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-1Dlhb.png


--------------------------------------------------------------------------------
/figures/ObstructedMaze-2Dlhb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mit-acl/gym-minigrid/HEAD/figures/ObstructedMaze-2Dlhb.png


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.5"
 4 | 
 5 | # command to install dependencies
 6 | install:
 7 |   - pip3 install -e .
 8 | 
 9 | # command to run tests
10 | script: ./run_tests.py
11 | 


--------------------------------------------------------------------------------
/gym_minigrid/__init__.py:
--------------------------------------------------------------------------------
1 | # Import the envs module so that envs register themselves
2 | import gym_minigrid.envs
3 | 
4 | # Import wrappers so it's accessible when installing with pip
5 | import gym_minigrid.wrappers
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='gym_minigrid',
 5 |     version='0.0.5',
 6 |     keywords='memory, environment, agent, rl, openaigym, openai-gym, gym',
 7 |     url='https://github.com/maximecb/gym-minigrid',
 8 |     description='Minimalistic gridworld package for OpenAI Gym',
 9 |     packages=['gym_minigrid', 'gym_minigrid.envs'],
10 |     install_requires=[
11 |         'gym>=0.9.6',
12 |         'numpy>=1.15.0',
13 |         'pyqt5>=5.10.1'
14 |     ]
15 | )
16 | 


--------------------------------------------------------------------------------
/gym_minigrid/register.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register as gym_register
 2 | 
 3 | env_list = []
 4 | 
 5 | def register(
 6 |     id,
 7 |     entry_point,
 8 |     reward_threshold=0.95
 9 | ):
10 |     assert id.startswith("MiniGrid-")
11 |     assert id not in env_list
12 | 
13 |     # Register the environment with OpenAI gym
14 |     gym_register(
15 |         id=id,
16 |         entry_point=entry_point,
17 |         reward_threshold=reward_threshold
18 |     )
19 | 
20 |     # Add the environment to the set
21 |     env_list.append(id)
22 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.envs.empty import *
 2 | from gym_minigrid.envs.doorkey import *
 3 | from gym_minigrid.envs.multiroom import *
 4 | from gym_minigrid.envs.fetch import *
 5 | from gym_minigrid.envs.gotoobject import *
 6 | from gym_minigrid.envs.gotodoor import *
 7 | from gym_minigrid.envs.putnear import *
 8 | from gym_minigrid.envs.lockedroom import *
 9 | from gym_minigrid.envs.keycorridor import *
10 | from gym_minigrid.envs.unlock import *
11 | from gym_minigrid.envs.unlockpickup import *
12 | from gym_minigrid.envs.blockedunlockpickup import *
13 | from gym_minigrid.envs.playground_v0 import *
14 | from gym_minigrid.envs.redbluedoors import *
15 | from gym_minigrid.envs.obstructedmaze import *
16 | from gym_minigrid.envs.memory import *
17 | from gym_minigrid.envs.fourrooms import *
18 | from gym_minigrid.envs.crossing import *
19 | from gym_minigrid.envs.dynamicobstacles import *
20 | from gym_minigrid.envs.distshift import *
21 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/unlock.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import Ball
 2 | from gym_minigrid.roomgrid import RoomGrid
 3 | from gym_minigrid.register import register
 4 | 
 5 | class Unlock(RoomGrid):
 6 |     """
 7 |     Unlock a door
 8 |     """
 9 | 
10 |     def __init__(self, seed=None):
11 |         room_size = 6
12 |         super().__init__(
13 |             num_rows=1,
14 |             num_cols=2,
15 |             room_size=room_size,
16 |             max_steps=8*room_size**2,
17 |             seed=seed
18 |         )
19 | 
20 |     def _gen_grid(self, width, height):
21 |         super()._gen_grid(width, height)
22 | 
23 |         # Make sure the two rooms are directly connected by a locked door
24 |         door, _ = self.add_door(0, 0, 0, locked=True)
25 |         # Add a key to unlock the door
26 |         self.add_object(0, 0, 'key', door.color)
27 | 
28 |         self.place_agent(0, 0)
29 | 
30 |         self.door = door
31 |         self.mission = "open the door"
32 | 
33 |     def step(self, action):
34 |         obs, reward, done, info = super().step(action)
35 | 
36 |         if action == self.actions.toggle:
37 |             if self.door.is_open:
38 |                 reward = self._reward()
39 |                 done = True
40 | 
41 |         return obs, reward, done, info
42 | 
43 | register(
44 |     id='MiniGrid-Unlock-v0',
45 |     entry_point='gym_minigrid.envs:Unlock'
46 | )
47 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/unlockpickup.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import Ball
 2 | from gym_minigrid.roomgrid import RoomGrid
 3 | from gym_minigrid.register import register
 4 | 
 5 | class UnlockPickup(RoomGrid):
 6 |     """
 7 |     Unlock a door, then pick up a box in another room
 8 |     """
 9 | 
10 |     def __init__(self, seed=None):
11 |         room_size = 6
12 |         super().__init__(
13 |             num_rows=1,
14 |             num_cols=2,
15 |             room_size=room_size,
16 |             max_steps=8*room_size**2,
17 |             seed=seed
18 |         )
19 | 
20 |     def _gen_grid(self, width, height):
21 |         super()._gen_grid(width, height)
22 | 
23 |         # Add a box to the room on the right
24 |         obj, _ = self.add_object(1, 0, kind="box")
25 |         # Make sure the two rooms are directly connected by a locked door
26 |         door, _ = self.add_door(0, 0, 0, locked=True)
27 |         # Add a key to unlock the door
28 |         self.add_object(0, 0, 'key', door.color)
29 | 
30 |         self.place_agent(0, 0)
31 | 
32 |         self.obj = obj
33 |         self.mission = "pick up the %s %s" % (obj.color, obj.type)
34 | 
35 |     def step(self, action):
36 |         obs, reward, done, info = super().step(action)
37 | 
38 |         if action == self.actions.pickup:
39 |             if self.carrying and self.carrying == self.obj:
40 |                 reward = self._reward()
41 |                 done = True
42 | 
43 |         return obs, reward, done, info
44 | 
45 | register(
46 |     id='MiniGrid-UnlockPickup-v0',
47 |     entry_point='gym_minigrid.envs:UnlockPickup'
48 | )
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Maxime Chevalier-Boisvert
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/blockedunlockpickup.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import Ball
 2 | from gym_minigrid.roomgrid import RoomGrid
 3 | from gym_minigrid.register import register
 4 | 
 5 | class BlockedUnlockPickup(RoomGrid):
 6 |     """
 7 |     Unlock a door blocked by a ball, then pick up a box
 8 |     in another room
 9 |     """
10 | 
11 |     def __init__(self, seed=None):
12 |         room_size = 6
13 |         super().__init__(
14 |             num_rows=1,
15 |             num_cols=2,
16 |             room_size=room_size,
17 |             max_steps=16*room_size**2,
18 |             seed=seed
19 |         )
20 | 
21 |     def _gen_grid(self, width, height):
22 |         super()._gen_grid(width, height)
23 | 
24 |         # Add a box to the room on the right
25 |         obj, _ = self.add_object(1, 0, kind="box")
26 |         # Make sure the two rooms are directly connected by a locked door
27 |         door, pos = self.add_door(0, 0, 0, locked=True)
28 |         # Block the door with a ball
29 |         color = self._rand_color()
30 |         self.grid.set(pos[0]-1, pos[1], Ball(color))
31 |         # Add a key to unlock the door
32 |         self.add_object(0, 0, 'key', door.color)
33 | 
34 |         self.place_agent(0, 0)
35 | 
36 |         self.obj = obj
37 |         self.mission = "pick up the %s %s" % (obj.color, obj.type)
38 | 
39 |     def step(self, action):
40 |         obs, reward, done, info = super().step(action)
41 | 
42 |         if action == self.actions.pickup:
43 |             if self.carrying and self.carrying == self.obj:
44 |                 reward = self._reward()
45 |                 done = True
46 | 
47 |         return obs, reward, done, info
48 | 
49 | register(
50 |     id='MiniGrid-BlockedUnlockPickup-v0',
51 |     entry_point='gym_minigrid.envs:BlockedUnlockPickup'
52 | )
53 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/distshift.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class DistShiftEnv(MiniGridEnv):
 5 |     """
 6 |     Distributional shift environment.
 7 |     """
 8 | 
 9 |     def __init__(
10 |         self,
11 |         width=9,
12 |         height=7,
13 |         agent_start_pos=(1,1),
14 |         agent_start_dir=0,
15 |         strip2_row=2
16 |     ):
17 |         self.agent_start_pos = agent_start_pos
18 |         self.agent_start_dir = agent_start_dir
19 |         self.goal_pos = (width-2, 1)
20 |         self.strip2_row = strip2_row
21 | 
22 |         super().__init__(
23 |             width=width,
24 |             height=height,
25 |             max_steps=4*width*height,
26 |             # Set this to True for maximum speed
27 |             see_through_walls=True
28 |         )
29 | 
30 |     def _gen_grid(self, width, height):
31 |         # Create an empty grid
32 |         self.grid = Grid(width, height)
33 | 
34 |         # Generate the surrounding walls
35 |         self.grid.wall_rect(0, 0, width, height)
36 | 
37 |         # Place a goal square in the bottom-right corner
38 |         self.grid.set(*self.goal_pos, Goal())
39 | 
40 |         # Place the lava rows
41 |         for i in range(self.width - 6):
42 |             self.grid.set(3+i, 1, Lava())
43 |             self.grid.set(3+i, self.strip2_row, Lava())
44 | 
45 |         # Place the agent
46 |         if self.agent_start_pos is not None:
47 |             self.agent_pos = self.agent_start_pos
48 |             self.agent_dir = self.agent_start_dir
49 |         else:
50 |             self.place_agent()
51 | 
52 |         self.mission = "get to the green goal square"
53 | 
54 | class DistShift1(DistShiftEnv):
55 |     def __init__(self):
56 |         super().__init__(strip2_row=2)
57 | 
58 | class DistShift2(DistShiftEnv):
59 |     def __init__(self):
60 |         super().__init__(strip2_row=5)
61 | 
62 | register(
63 |     id='MiniGrid-DistShift1-v0',
64 |     entry_point='gym_minigrid.envs:DistShift1'
65 | )
66 | 
67 | register(
68 |     id='MiniGrid-DistShift2-v0',
69 |     entry_point='gym_minigrid.envs:DistShift2'
70 | )
71 | 


--------------------------------------------------------------------------------
/manual_control.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from __future__ import division, print_function
 4 | 
 5 | import sys
 6 | import numpy
 7 | import gym
 8 | import time
 9 | from optparse import OptionParser
10 | 
11 | import gym_minigrid
12 | 
13 | def main():
14 |     parser = OptionParser()
15 |     parser.add_option(
16 |         "-e",
17 |         "--env-name",
18 |         dest="env_name",
19 |         help="gym environment to load",
20 |         default='MiniGrid-MultiRoom-N6-v0'
21 |     )
22 |     (options, args) = parser.parse_args()
23 | 
24 |     # Load the gym environment
25 |     env = gym.make(options.env_name)
26 | 
27 |     def resetEnv():
28 |         env.reset()
29 |         if hasattr(env, 'mission'):
30 |             print('Mission: %s' % env.mission)
31 | 
32 |     resetEnv()
33 | 
34 |     # Create a window to render into
35 |     renderer = env.render('human')
36 | 
37 |     def keyDownCb(keyName):
38 |         if keyName == 'BACKSPACE':
39 |             resetEnv()
40 |             return
41 | 
42 |         if keyName == 'ESCAPE':
43 |             sys.exit(0)
44 | 
45 |         action = 0
46 | 
47 |         if keyName == 'LEFT':
48 |             action = env.actions.left
49 |         elif keyName == 'RIGHT':
50 |             action = env.actions.right
51 |         elif keyName == 'UP':
52 |             action = env.actions.forward
53 | 
54 |         elif keyName == 'SPACE':
55 |             action = env.actions.toggle
56 |         elif keyName == 'PAGE_UP':
57 |             action = env.actions.pickup
58 |         elif keyName == 'PAGE_DOWN':
59 |             action = env.actions.drop
60 | 
61 |         elif keyName == 'RETURN':
62 |             action = env.actions.done
63 | 
64 |         else:
65 |             print("unknown key %s" % keyName)
66 |             return
67 | 
68 |         obs, reward, done, info = env.step(action)
69 | 
70 |         print('step=%s, reward=%.2f' % (env.step_count, reward))
71 | 
72 |         if done:
73 |             print('done!')
74 |             resetEnv()
75 | 
76 |     renderer.window.setKeyDownCb(keyDownCb)
77 | 
78 |     while True:
79 |         env.render('human')
80 |         time.sleep(0.01)
81 | 
82 |         # If the window was closed
83 |         if renderer.window == None:
84 |             break
85 | 
86 | if __name__ == "__main__":
87 |     main()
88 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/doorkey.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class DoorKeyEnv(MiniGridEnv):
 5 |     """
 6 |     Environment with a door and key, sparse reward
 7 |     """
 8 | 
 9 |     def __init__(self, size=8):
10 |         super().__init__(
11 |             grid_size=size,
12 |             max_steps=10*size*size
13 |         )
14 | 
15 |     def _gen_grid(self, width, height):
16 |         # Create an empty grid
17 |         self.grid = Grid(width, height)
18 | 
19 |         # Generate the surrounding walls
20 |         self.grid.wall_rect(0, 0, width, height)
21 | 
22 |         # Place a goal in the bottom-right corner
23 |         self.grid.set(width - 2, height - 2, Goal())
24 | 
25 |         # Create a vertical splitting wall
26 |         splitIdx = self._rand_int(2, width-2)
27 |         self.grid.vert_wall(splitIdx, 0)
28 | 
29 |         # Place the agent at a random position and orientation
30 |         # on the left side of the splitting wall
31 |         self.place_agent(size=(splitIdx, height))
32 | 
33 |         # Place a door in the wall
34 |         doorIdx = self._rand_int(1, width-2)
35 |         self.grid.set(splitIdx, doorIdx, Door('yellow', is_locked=True))
36 | 
37 |         # Place a yellow key on the left side
38 |         self.place_obj(
39 |             obj=Key('yellow'),
40 |             top=(0, 0),
41 |             size=(splitIdx, height)
42 |         )
43 | 
44 |         self.mission = "use the key to open the door and then get to the goal"
45 | 
46 | class DoorKeyEnv5x5(DoorKeyEnv):
47 |     def __init__(self):
48 |         super().__init__(size=5)
49 | 
50 | class DoorKeyEnv6x6(DoorKeyEnv):
51 |     def __init__(self):
52 |         super().__init__(size=6)
53 | 
54 | class DoorKeyEnv16x16(DoorKeyEnv):
55 |     def __init__(self):
56 |         super().__init__(size=16)
57 | 
58 | register(
59 |     id='MiniGrid-DoorKey-5x5-v0',
60 |     entry_point='gym_minigrid.envs:DoorKeyEnv5x5'
61 | )
62 | 
63 | register(
64 |     id='MiniGrid-DoorKey-6x6-v0',
65 |     entry_point='gym_minigrid.envs:DoorKeyEnv6x6'
66 | )
67 | 
68 | register(
69 |     id='MiniGrid-DoorKey-8x8-v0',
70 |     entry_point='gym_minigrid.envs:DoorKeyEnv'
71 | )
72 | 
73 | register(
74 |     id='MiniGrid-DoorKey-16x16-v0',
75 |     entry_point='gym_minigrid.envs:DoorKeyEnv16x16'
76 | )
77 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/empty.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class EmptyEnv(MiniGridEnv):
 5 |     """
 6 |     Empty grid environment, no obstacles, sparse reward
 7 |     """
 8 | 
 9 |     def __init__(
10 |         self,
11 |         size=8,
12 |         agent_start_pos=(1,1),
13 |         agent_start_dir=0,
14 |     ):
15 |         self.agent_start_pos = agent_start_pos
16 |         self.agent_start_dir = agent_start_dir
17 | 
18 |         super().__init__(
19 |             grid_size=size,
20 |             max_steps=4*size*size,
21 |             # Set this to True for maximum speed
22 |             see_through_walls=True
23 |         )
24 | 
25 |     def _gen_grid(self, width, height):
26 |         # Create an empty grid
27 |         self.grid = Grid(width, height)
28 | 
29 |         # Generate the surrounding walls
30 |         self.grid.wall_rect(0, 0, width, height)
31 | 
32 |         # Place a goal square in the bottom-right corner
33 |         self.grid.set(width - 2, height - 2, Goal())
34 | 
35 |         # Place the agent
36 |         if self.agent_start_pos is not None:
37 |             self.agent_pos = self.agent_start_pos
38 |             self.agent_dir = self.agent_start_dir
39 |         else:
40 |             self.place_agent()
41 | 
42 |         self.mission = "get to the green goal square"
43 | 
44 | class EmptyEnv5x5(EmptyEnv):
45 |     def __init__(self):
46 |         super().__init__(size=5)
47 | 
48 | class EmptyRandomEnv5x5(EmptyEnv):
49 |     def __init__(self):
50 |         super().__init__(size=5, agent_start_pos=None)
51 | 
52 | class EmptyEnv6x6(EmptyEnv):
53 |     def __init__(self):
54 |         super().__init__(size=6)
55 | 
56 | class EmptyRandomEnv6x6(EmptyEnv):
57 |     def __init__(self):
58 |         super().__init__(size=6, agent_start_pos=None)
59 | 
60 | class EmptyEnv16x16(EmptyEnv):
61 |     def __init__(self):
62 |         super().__init__(size=16)
63 | 
64 | register(
65 |     id='MiniGrid-Empty-5x5-v0',
66 |     entry_point='gym_minigrid.envs:EmptyEnv5x5'
67 | )
68 | 
69 | register(
70 |     id='MiniGrid-Empty-Random-5x5-v0',
71 |     entry_point='gym_minigrid.envs:EmptyRandomEnv5x5'
72 | )
73 | 
74 | register(
75 |     id='MiniGrid-Empty-6x6-v0',
76 |     entry_point='gym_minigrid.envs:EmptyEnv6x6'
77 | )
78 | 
79 | register(
80 |     id='MiniGrid-Empty-Random-6x6-v0',
81 |     entry_point='gym_minigrid.envs:EmptyRandomEnv6x6'
82 | )
83 | 
84 | register(
85 |     id='MiniGrid-Empty-8x8-v0',
86 |     entry_point='gym_minigrid.envs:EmptyEnv'
87 | )
88 | 
89 | register(
90 |     id='MiniGrid-Empty-16x16-v0',
91 |     entry_point='gym_minigrid.envs:EmptyEnv16x16'
92 | )
93 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/playground_v0.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class PlaygroundV0(MiniGridEnv):
 5 |     """
 6 |     Environment with multiple rooms and random objects.
 7 |     This environment has no specific goals or rewards.
 8 |     """
 9 | 
10 |     def __init__(self):
11 |         super().__init__(grid_size=19, max_steps=100)
12 | 
13 |     def _gen_grid(self, width, height):
14 |         # Create the grid
15 |         self.grid = Grid(width, height)
16 | 
17 |         # Generate the surrounding walls
18 |         self.grid.horz_wall(0, 0)
19 |         self.grid.horz_wall(0, height-1)
20 |         self.grid.vert_wall(0, 0)
21 |         self.grid.vert_wall(width-1, 0)
22 | 
23 |         roomW = width // 3
24 |         roomH = height // 3
25 | 
26 |         # For each row of rooms
27 |         for j in range(0, 3):
28 | 
29 |             # For each column
30 |             for i in range(0, 3):
31 |                 xL = i * roomW
32 |                 yT = j * roomH
33 |                 xR = xL + roomW
34 |                 yB = yT + roomH
35 | 
36 |                 # Bottom wall and door
37 |                 if i+1 < 3:
38 |                     self.grid.vert_wall(xR, yT, roomH)
39 |                     pos = (xR, self._rand_int(yT+1, yB-1))
40 |                     color = self._rand_elem(COLOR_NAMES)
41 |                     self.grid.set(*pos, Door(color))
42 | 
43 |                 # Bottom wall and door
44 |                 if j+1 < 3:
45 |                     self.grid.horz_wall(xL, yB, roomW)
46 |                     pos = (self._rand_int(xL+1, xR-1), yB)
47 |                     color = self._rand_elem(COLOR_NAMES)
48 |                     self.grid.set(*pos, Door(color))
49 | 
50 |         # Randomize the player start position and orientation
51 |         self.place_agent()
52 | 
53 |         # Place random objects in the world
54 |         types = ['key', 'ball', 'box']
55 |         for i in range(0, 12):
56 |             objType = self._rand_elem(types)
57 |             objColor = self._rand_elem(COLOR_NAMES)
58 |             if objType == 'key':
59 |                 obj = Key(objColor)
60 |             elif objType == 'ball':
61 |                 obj = Ball(objColor)
62 |             elif objType == 'box':
63 |                 obj = Box(objColor)
64 |             self.place_obj(obj)
65 | 
66 |         # No explicit mission in this environment
67 |         self.mission = ''
68 | 
69 |     def step(self, action):
70 |         obs, reward, done, info = MiniGridEnv.step(self, action)
71 |         return obs, reward, done, info
72 | 
73 | register(
74 |     id='MiniGrid-Playground-v0',
75 |     entry_point='gym_minigrid.envs:PlaygroundV0'
76 | )
77 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/redbluedoors.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class RedBlueDoorEnv(MiniGridEnv):
 5 |     """
 6 |     Single room with red and blue doors on opposite sides.
 7 |     The red door must be opened before the blue door to
 8 |     obtain a reward.
 9 |     """
10 | 
11 |     def __init__(self, size=8):
12 |         self.size = size
13 | 
14 |         super().__init__(
15 |             width=2*size,
16 |             height=size,
17 |             max_steps=20*size*size
18 |         )
19 | 
20 |     def _gen_grid(self, width, height):
21 |         # Create an empty grid
22 |         self.grid = Grid(width, height)
23 | 
24 |         # Generate the grid walls
25 |         self.grid.wall_rect(0, 0, 2*self.size, self.size)
26 |         self.grid.wall_rect(self.size//2, 0, self.size, self.size)
27 | 
28 |         # Place the agent in the top-left corner
29 |         self.place_agent(top=(self.size//2, 0), size=(self.size, self.size))
30 | 
31 |         # Add a red door at a random position in the left wall
32 |         pos = self._rand_int(1, self.size - 1)
33 |         self.red_door = Door("red")
34 |         self.grid.set(self.size//2, pos, self.red_door)
35 | 
36 |         # Add a blue door at a random position in the right wall
37 |         pos = self._rand_int(1, self.size - 1)
38 |         self.blue_door = Door("blue")
39 |         self.grid.set(self.size//2 + self.size - 1, pos, self.blue_door)
40 | 
41 |         # Generate the mission string
42 |         self.mission = "open the red door then the blue door"
43 | 
44 |     def step(self, action):
45 |         red_door_opened_before = self.red_door.is_open
46 |         blue_door_opened_before = self.blue_door.is_open
47 | 
48 |         obs, reward, done, info = MiniGridEnv.step(self, action)
49 | 
50 |         red_door_opened_after = self.red_door.is_open
51 |         blue_door_opened_after = self.blue_door.is_open
52 | 
53 |         if blue_door_opened_after:
54 |             if red_door_opened_before:
55 |                 reward = self._reward()
56 |                 done = True
57 |             else:
58 |                 reward = 0
59 |                 done = True
60 | 
61 |         elif red_door_opened_after:
62 |             if blue_door_opened_before:
63 |                 reward = 0
64 |                 done = True
65 | 
66 |         return obs, reward, done, info
67 | 
68 | class RedBlueDoorEnv6x6(RedBlueDoorEnv):
69 |     def __init__(self):
70 |         super().__init__(size=6)
71 | 
72 | register(
73 |     id='MiniGrid-RedBlueDoors-6x6-v0',
74 |     entry_point='gym_minigrid.envs:RedBlueDoorEnv6x6'
75 | )
76 | 
77 | register(
78 |     id='MiniGrid-RedBlueDoors-8x8-v0',
79 |     entry_point='gym_minigrid.envs:RedBlueDoorEnv'
80 | )
81 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/fourrooms.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from gym_minigrid.minigrid import *
 5 | from gym_minigrid.register import register
 6 | 
 7 | 
 8 | class FourRoomsEnv(MiniGridEnv):
 9 |     """
10 |     Classic 4 rooms gridworld environment.
11 |     Can specify agent and goal position, if not it set at random.
12 |     """
13 | 
14 |     def __init__(self, agent_pos=None, goal_pos=None):
15 |         self._agent_default_pos = agent_pos
16 |         self._goal_default_pos = goal_pos
17 |         super().__init__(grid_size=19, max_steps=100)
18 | 
19 |     def _gen_grid(self, width, height):
20 |         # Create the grid
21 |         self.grid = Grid(width, height)
22 | 
23 |         # Generate the surrounding walls
24 |         self.grid.horz_wall(0, 0)
25 |         self.grid.horz_wall(0, height - 1)
26 |         self.grid.vert_wall(0, 0)
27 |         self.grid.vert_wall(width - 1, 0)
28 | 
29 |         room_w = width // 2
30 |         room_h = height // 2
31 | 
32 |         # For each row of rooms
33 |         for j in range(0, 2):
34 | 
35 |             # For each column
36 |             for i in range(0, 2):
37 |                 xL = i * room_w
38 |                 yT = j * room_h
39 |                 xR = xL + room_w
40 |                 yB = yT + room_h
41 | 
42 |                 # Bottom wall and door
43 |                 if i + 1 < 2:
44 |                     self.grid.vert_wall(xR, yT, room_h)
45 |                     pos = (xR, self._rand_int(yT + 1, yB))
46 |                     self.grid.set(*pos, None)
47 | 
48 |                 # Bottom wall and door
49 |                 if j + 1 < 2:
50 |                     self.grid.horz_wall(xL, yB, room_w)
51 |                     pos = (self._rand_int(xL + 1, xR), yB)
52 |                     self.grid.set(*pos, None)
53 | 
54 |         # Randomize the player start position and orientation
55 |         if self._agent_default_pos is not None:
56 |             self.agent_pos = self._agent_default_pos
57 |             self.grid.set(*self._agent_default_pos, None)
58 |             self.agent_dir = self._rand_int(0, 4)  # assuming random start direction
59 |         else:
60 |             self.place_agent()
61 | 
62 |         if self._goal_default_pos is not None:
63 |             goal = Goal()
64 |             self.grid.set(*self._goal_default_pos, goal)
65 |             goal.init_pos, goal.cur_pos = self._goal_default_pos
66 |         else:
67 |             self.place_obj(Goal())
68 | 
69 |         self.mission = 'Reach the goal'
70 | 
71 |     def step(self, action):
72 |         obs, reward, done, info = MiniGridEnv.step(self, action)
73 |         return obs, reward, done, info
74 | 
75 | 
76 | register(
77 |     id='MiniGrid-FourRooms-v0',
78 |     entry_point='gym_minigrid.envs:FourRoomsEnv'
79 | )
80 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/gotoobject.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class GoToObjectEnv(MiniGridEnv):
 5 |     """
 6 |     Environment in which the agent is instructed to go to a given object
 7 |     named using an English text string
 8 |     """
 9 | 
10 |     def __init__(
11 |         self,
12 |         size=6,
13 |         numObjs=2
14 |     ):
15 |         self.numObjs = numObjs
16 | 
17 |         super().__init__(
18 |             grid_size=size,
19 |             max_steps=5*size**2,
20 |             # Set this to True for maximum speed
21 |             see_through_walls=True
22 |         )
23 | 
24 |     def _gen_grid(self, width, height):
25 |         self.grid = Grid(width, height)
26 | 
27 |         # Generate the surrounding walls
28 |         self.grid.wall_rect(0, 0, width, height)
29 | 
30 |         # Types and colors of objects we can generate
31 |         types = ['key', 'ball', 'box']
32 | 
33 |         objs = []
34 |         objPos = []
35 | 
36 |         # Until we have generated all the objects
37 |         while len(objs) < self.numObjs:
38 |             objType = self._rand_elem(types)
39 |             objColor = self._rand_elem(COLOR_NAMES)
40 | 
41 |             # If this object already exists, try again
42 |             if (objType, objColor) in objs:
43 |                 continue
44 | 
45 |             if objType == 'key':
46 |                 obj = Key(objColor)
47 |             elif objType == 'ball':
48 |                 obj = Ball(objColor)
49 |             elif objType == 'box':
50 |                 obj = Box(objColor)
51 | 
52 |             pos = self.place_obj(obj)
53 |             objs.append((objType, objColor))
54 |             objPos.append(pos)
55 | 
56 |         # Randomize the agent start position and orientation
57 |         self.place_agent()
58 | 
59 |         # Choose a random object to be picked up
60 |         objIdx = self._rand_int(0, len(objs))
61 |         self.targetType, self.target_color = objs[objIdx]
62 |         self.target_pos = objPos[objIdx]
63 | 
64 |         descStr = '%s %s' % (self.target_color, self.targetType)
65 |         self.mission = 'go to the %s' % descStr
66 |         #print(self.mission)
67 | 
68 |     def step(self, action):
69 |         obs, reward, done, info = MiniGridEnv.step(self, action)
70 | 
71 |         ax, ay = self.agent_pos
72 |         tx, ty = self.target_pos
73 | 
74 |         # Toggle/pickup action terminates the episode
75 |         if action == self.actions.toggle:
76 |             done = True
77 | 
78 |         # Reward performing the done action next to the target object
79 |         if action == self.actions.done:
80 |             if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
81 |                 reward = self._reward()
82 |             done = True
83 | 
84 |         return obs, reward, done, info
85 | 
86 | class GotoEnv8x8N2(GoToObjectEnv):
87 |     def __init__(self):
88 |         super().__init__(size=8, numObjs=2)
89 | 
90 | register(
91 |     id='MiniGrid-GoToObject-6x6-N2-v0',
92 |     entry_point='gym_minigrid.envs:GoToObjectEnv'
93 | )
94 | 
95 | register(
96 |     id='MiniGrid-GoToObject-8x8-N2-v0',
97 |     entry_point='gym_minigrid.envs:GotoEnv8x8N2'
98 | )
99 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/fetch.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | class FetchEnv(MiniGridEnv):
  5 |     """
  6 |     Environment in which the agent has to fetch a random object
  7 |     named using English text strings
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         size=8,
 13 |         numObjs=3
 14 |     ):
 15 |         self.numObjs = numObjs
 16 | 
 17 |         super().__init__(
 18 |             grid_size=size,
 19 |             max_steps=5*size**2,
 20 |             # Set this to True for maximum speed
 21 |             see_through_walls=True
 22 |         )
 23 | 
 24 |     def _gen_grid(self, width, height):
 25 |         self.grid = Grid(width, height)
 26 | 
 27 |         # Generate the surrounding walls
 28 |         self.grid.horz_wall(0, 0)
 29 |         self.grid.horz_wall(0, height-1)
 30 |         self.grid.vert_wall(0, 0)
 31 |         self.grid.vert_wall(width-1, 0)
 32 | 
 33 |         types = ['key', 'ball']
 34 | 
 35 |         objs = []
 36 | 
 37 |         # For each object to be generated
 38 |         while len(objs) < self.numObjs:
 39 |             objType = self._rand_elem(types)
 40 |             objColor = self._rand_elem(COLOR_NAMES)
 41 | 
 42 |             if objType == 'key':
 43 |                 obj = Key(objColor)
 44 |             elif objType == 'ball':
 45 |                 obj = Ball(objColor)
 46 | 
 47 |             self.place_obj(obj)
 48 |             objs.append(obj)
 49 | 
 50 |         # Randomize the player start position and orientation
 51 |         self.place_agent()
 52 | 
 53 |         # Choose a random object to be picked up
 54 |         target = objs[self._rand_int(0, len(objs))]
 55 |         self.targetType = target.type
 56 |         self.targetColor = target.color
 57 | 
 58 |         descStr = '%s %s' % (self.targetColor, self.targetType)
 59 | 
 60 |         # Generate the mission string
 61 |         idx = self._rand_int(0, 5)
 62 |         if idx == 0:
 63 |             self.mission = 'get a %s' % descStr
 64 |         elif idx == 1:
 65 |             self.mission = 'go get a %s' % descStr
 66 |         elif idx == 2:
 67 |             self.mission = 'fetch a %s' % descStr
 68 |         elif idx == 3:
 69 |             self.mission = 'go fetch a %s' % descStr
 70 |         elif idx == 4:
 71 |             self.mission = 'you must fetch a %s' % descStr
 72 |         assert hasattr(self, 'mission')
 73 | 
 74 |     def step(self, action):
 75 |         obs, reward, done, info = MiniGridEnv.step(self, action)
 76 | 
 77 |         if self.carrying:
 78 |             if self.carrying.color == self.targetColor and \
 79 |                self.carrying.type == self.targetType:
 80 |                 reward = self._reward()
 81 |                 done = True
 82 |             else:
 83 |                 reward = 0
 84 |                 done = True
 85 | 
 86 |         return obs, reward, done, info
 87 | 
 88 | class FetchEnv5x5N2(FetchEnv):
 89 |     def __init__(self):
 90 |         super().__init__(size=5, numObjs=2)
 91 | 
 92 | class FetchEnv6x6N2(FetchEnv):
 93 |     def __init__(self):
 94 |         super().__init__(size=6, numObjs=2)
 95 | 
 96 | register(
 97 |     id='MiniGrid-Fetch-5x5-N2-v0',
 98 |     entry_point='gym_minigrid.envs:FetchEnv5x5N2'
 99 | )
100 | 
101 | register(
102 |     id='MiniGrid-Fetch-6x6-N2-v0',
103 |     entry_point='gym_minigrid.envs:FetchEnv6x6N2'
104 | )
105 | 
106 | register(
107 |     id='MiniGrid-Fetch-8x8-N3-v0',
108 |     entry_point='gym_minigrid.envs:FetchEnv'
109 | )
110 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/gotodoor.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | class GoToDoorEnv(MiniGridEnv):
  5 |     """
  6 |     Environment in which the agent is instructed to go to a given object
  7 |     named using an English text string
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         size=5
 13 |     ):
 14 |         assert size >= 5
 15 | 
 16 |         super().__init__(
 17 |             grid_size=size,
 18 |             max_steps=5*size**2,
 19 |             # Set this to True for maximum speed
 20 |             see_through_walls=True
 21 |         )
 22 | 
 23 |     def _gen_grid(self, width, height):
 24 |         # Create the grid
 25 |         self.grid = Grid(width, height)
 26 | 
 27 |         # Randomly vary the room width and height
 28 |         width = self._rand_int(5, width+1)
 29 |         height = self._rand_int(5, height+1)
 30 | 
 31 |         # Generate the surrounding walls
 32 |         self.grid.wall_rect(0, 0, width, height)
 33 | 
 34 |         # Generate the 4 doors at random positions
 35 |         doorPos = []
 36 |         doorPos.append((self._rand_int(2, width-2), 0))
 37 |         doorPos.append((self._rand_int(2, width-2), height-1))
 38 |         doorPos.append((0, self._rand_int(2, height-2)))
 39 |         doorPos.append((width-1, self._rand_int(2, height-2)))
 40 | 
 41 |         # Generate the door colors
 42 |         doorColors = []
 43 |         while len(doorColors) < len(doorPos):
 44 |             color = self._rand_elem(COLOR_NAMES)
 45 |             if color in doorColors:
 46 |                 continue
 47 |             doorColors.append(color)
 48 | 
 49 |         # Place the doors in the grid
 50 |         for idx, pos in enumerate(doorPos):
 51 |             color = doorColors[idx]
 52 |             self.grid.set(*pos, Door(color))
 53 | 
 54 |         # Randomize the agent start position and orientation
 55 |         self.place_agent(size=(width, height))
 56 | 
 57 |         # Select a random target door
 58 |         doorIdx = self._rand_int(0, len(doorPos))
 59 |         self.target_pos = doorPos[doorIdx]
 60 |         self.target_color = doorColors[doorIdx]
 61 | 
 62 |         # Generate the mission string
 63 |         self.mission = 'go to the %s door' % self.target_color
 64 | 
 65 |     def step(self, action):
 66 |         obs, reward, done, info = super().step(action)
 67 | 
 68 |         ax, ay = self.agent_pos
 69 |         tx, ty = self.target_pos
 70 | 
 71 |         # Don't let the agent open any of the doors
 72 |         if action == self.actions.toggle:
 73 |             done = True
 74 | 
 75 |         # Reward performing done action in front of the target door
 76 |         if action == self.actions.done:
 77 |             if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
 78 |                 reward = self._reward()
 79 |             done = True
 80 | 
 81 |         return obs, reward, done, info
 82 | 
 83 | class GoToDoor8x8Env(GoToDoorEnv):
 84 |     def __init__(self):
 85 |         super().__init__(size=8)
 86 | 
 87 | class GoToDoor6x6Env(GoToDoorEnv):
 88 |     def __init__(self):
 89 |         super().__init__(size=6)
 90 | 
 91 | register(
 92 |     id='MiniGrid-GoToDoor-5x5-v0',
 93 |     entry_point='gym_minigrid.envs:GoToDoorEnv'
 94 | )
 95 | 
 96 | register(
 97 |     id='MiniGrid-GoToDoor-6x6-v0',
 98 |     entry_point='gym_minigrid.envs:GoToDoor6x6Env'
 99 | )
100 | 
101 | register(
102 |     id='MiniGrid-GoToDoor-8x8-v0',
103 |     entry_point='gym_minigrid.envs:GoToDoor8x8Env'
104 | )
105 | 


--------------------------------------------------------------------------------
/run_tests.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import random
  4 | import numpy as np
  5 | import gym
  6 | from gym_minigrid.register import env_list
  7 | from gym_minigrid.minigrid import Grid, OBJECT_TO_IDX
  8 | 
  9 | # Test specifically importing a specific environment
 10 | from gym_minigrid.envs import DoorKeyEnv
 11 | 
 12 | # Test importing wrappers
 13 | from gym_minigrid.wrappers import *
 14 | 
 15 | ##############################################################################
 16 | 
 17 | print('%d environments registered' % len(env_list))
 18 | 
 19 | for env_name in env_list:
 20 |     print('testing "%s"' % env_name)
 21 | 
 22 |     # Load the gym environment
 23 |     env = gym.make(env_name)
 24 |     env.max_steps = min(env.max_steps, 200)
 25 |     env.reset()
 26 |     env.render('rgb_array')
 27 | 
 28 |     # Verify that the same seed always produces the same environment
 29 |     for i in range(0, 5):
 30 |         seed = 1337 + i
 31 |         env.seed(seed)
 32 |         grid1 = env.grid
 33 |         env.seed(seed)
 34 |         grid2 = env.grid
 35 |         assert grid1 == grid2
 36 | 
 37 |     env.reset()
 38 | 
 39 |     # Run for a few episodes
 40 |     num_episodes = 0
 41 |     while num_episodes < 5:
 42 |         # Pick a random action
 43 |         action = random.randint(0, env.action_space.n - 1)
 44 | 
 45 |         obs, reward, done, info = env.step(action)
 46 | 
 47 |         # Validate the agent position
 48 |         assert env.agent_pos[0] < env.width
 49 |         assert env.agent_pos[1] < env.height
 50 | 
 51 |         # Test observation encode/decode roundtrip
 52 |         img = obs['image']
 53 |         vis_mask = img[:, :, 0] != OBJECT_TO_IDX['unseen']  # hackish
 54 |         img2 = Grid.decode(img).encode(vis_mask=vis_mask)
 55 |         assert np.array_equal(img, img2)
 56 | 
 57 |         # Test the env to string function
 58 |         str(env)
 59 | 
 60 |         # Check that the reward is within the specified range
 61 |         assert reward >= env.reward_range[0], reward
 62 |         assert reward <= env.reward_range[1], reward
 63 | 
 64 |         if done:
 65 |             num_episodes += 1
 66 |             env.reset()
 67 | 
 68 |         env.render('rgb_array')
 69 | 
 70 |     # Test the close method
 71 |     env.close()
 72 | 
 73 |     env = gym.make(env_name)
 74 |     env = ReseedWrapper(env)
 75 |     for _ in range(10):
 76 |         env.reset()
 77 |         env.step(0)
 78 |         env.close()
 79 | 
 80 |     env = gym.make(env_name)
 81 |     env = ImgObsWrapper(env)
 82 |     env.reset()
 83 |     env.step(0)
 84 |     env.close()
 85 | 
 86 |     # Test the fully observable wrapper
 87 |     env = gym.make(env_name)
 88 |     env = FullyObsWrapper(env)
 89 |     env.reset()
 90 |     obs, _, _, _ = env.step(0)
 91 |     assert obs.shape == env.observation_space.shape
 92 |     env.close()
 93 | 
 94 |     env = gym.make(env_name)
 95 |     env = FlatObsWrapper(env)
 96 |     env.reset()
 97 |     env.step(0)
 98 |     env.close()
 99 | 
100 |     env = gym.make(env_name)
101 |     env = AgentViewWrapper(env, 5)
102 |     env.reset()
103 |     env.step(0)
104 |     env.close()
105 | 
106 | ##############################################################################
107 | 
108 | print('testing agent_sees method')
109 | env = gym.make('MiniGrid-DoorKey-6x6-v0')
110 | goal_pos = (env.grid.width - 2, env.grid.height - 2)
111 | 
112 | # Test the "in" operator on grid objects
113 | assert ('green', 'goal') in env.grid
114 | assert ('blue', 'key') not in env.grid
115 | 
116 | # Test the env.agent_sees() function
117 | env.reset()
118 | for i in range(0, 500):
119 |     action = random.randint(0, env.action_space.n - 1)
120 |     obs, reward, done, info = env.step(action)
121 |     goal_visible = ('green', 'goal') in Grid.decode(obs['image'])
122 |     agent_sees_goal = env.agent_sees(*goal_pos)
123 |     assert agent_sees_goal == goal_visible
124 |     if done:
125 |         env.reset()
126 | 
127 | #############################################################################
128 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/keycorridor.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.roomgrid import RoomGrid
  2 | from gym_minigrid.register import register
  3 | 
  4 | class KeyCorridor(RoomGrid):
  5 |     """
  6 |     A ball is behind a locked door, the key is placed in a
  7 |     random room.
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         num_rows=3,
 13 |         obj_type="ball",
 14 |         room_size=6,
 15 |         seed=None
 16 |     ):
 17 |         self.obj_type = obj_type
 18 | 
 19 |         super().__init__(
 20 |             room_size=room_size,
 21 |             num_rows=num_rows,
 22 |             max_steps=30*room_size**2,
 23 |             seed=seed,
 24 |         )
 25 | 
 26 |     def _gen_grid(self, width, height):
 27 |         super()._gen_grid(width, height)
 28 | 
 29 |         # Connect the middle column rooms into a hallway
 30 |         for j in range(1, self.num_rows):
 31 |             self.remove_wall(1, j, 3)
 32 | 
 33 |         # Add a locked door on the bottom right
 34 |         # Add an object behind the locked door
 35 |         room_idx = self._rand_int(0, self.num_rows)
 36 |         door, _ = self.add_door(2, room_idx, 2, locked=True)
 37 |         obj, _ = self.add_object(2, room_idx, kind=self.obj_type)
 38 | 
 39 |         # Add a key in a random room on the left side
 40 |         self.add_object(0, self._rand_int(0, self.num_rows), 'key', door.color)
 41 | 
 42 |         # Place the agent in the middle
 43 |         self.place_agent(1, self.num_rows // 2)
 44 | 
 45 |         # Make sure all rooms are accessible
 46 |         self.connect_all()
 47 | 
 48 |         self.obj = obj
 49 |         self.mission = "pick up the %s %s" % (obj.color, obj.type)
 50 | 
 51 |     def step(self, action):
 52 |         obs, reward, done, info = super().step(action)
 53 | 
 54 |         if action == self.actions.pickup:
 55 |             if self.carrying and self.carrying == self.obj:
 56 |                 reward = self._reward()
 57 |                 done = True
 58 | 
 59 |         return obs, reward, done, info
 60 | 
 61 | class KeyCorridorS3R1(KeyCorridor):
 62 |     def __init__(self, seed=None):
 63 |         super().__init__(
 64 |             room_size=3,
 65 |             num_rows=1,
 66 |             seed=seed
 67 |         )
 68 | 
 69 | class KeyCorridorS3R2(KeyCorridor):
 70 |     def __init__(self, seed=None):
 71 |         super().__init__(
 72 |             room_size=3,
 73 |             num_rows=2,
 74 |             seed=seed
 75 |         )
 76 | 
 77 | class KeyCorridorS3R3(KeyCorridor):
 78 |     def __init__(self, seed=None):
 79 |         super().__init__(
 80 |             room_size=3,
 81 |             num_rows=3,
 82 |             seed=seed
 83 |         )
 84 | 
 85 | class KeyCorridorS4R3(KeyCorridor):
 86 |     def __init__(self, seed=None):
 87 |         super().__init__(
 88 |             room_size=4,
 89 |             num_rows=3,
 90 |             seed=seed
 91 |         )
 92 | 
 93 | class KeyCorridorS5R3(KeyCorridor):
 94 |     def __init__(self, seed=None):
 95 |         super().__init__(
 96 |             room_size=5,
 97 |             num_rows=3,
 98 |             seed=seed
 99 |         )
100 | 
101 | class KeyCorridorS6R3(KeyCorridor):
102 |     def __init__(self, seed=None):
103 |         super().__init__(
104 |             room_size=6,
105 |             num_rows=3,
106 |             seed=seed
107 |         )
108 | 
109 | register(
110 |     id='MiniGrid-KeyCorridorS3R1-v0',
111 |     entry_point='gym_minigrid.envs:KeyCorridorS3R1'
112 | )
113 | 
114 | register(
115 |     id='MiniGrid-KeyCorridorS3R2-v0',
116 |     entry_point='gym_minigrid.envs:KeyCorridorS3R2'
117 | )
118 | 
119 | register(
120 |     id='MiniGrid-KeyCorridorS3R3-v0',
121 |     entry_point='gym_minigrid.envs:KeyCorridorS3R3'
122 | )
123 | 
124 | register(
125 |     id='MiniGrid-KeyCorridorS4R3-v0',
126 |     entry_point='gym_minigrid.envs:KeyCorridorS4R3'
127 | )
128 | 
129 | register(
130 |     id='MiniGrid-KeyCorridorS5R3-v0',
131 |     entry_point='gym_minigrid.envs:KeyCorridorS5R3'
132 | )
133 | 
134 | register(
135 |     id='MiniGrid-KeyCorridorS6R3-v0',
136 |     entry_point='gym_minigrid.envs:KeyCorridorS6R3'
137 | )
138 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/lockedroom.py:
--------------------------------------------------------------------------------
  1 | from gym import spaces
  2 | from gym_minigrid.minigrid import *
  3 | from gym_minigrid.register import register
  4 | 
  5 | class Room:
  6 |     def __init__(self,
  7 |         top,
  8 |         size,
  9 |         doorPos
 10 |     ):
 11 |         self.top = top
 12 |         self.size = size
 13 |         self.doorPos = doorPos
 14 |         self.color = None
 15 |         self.locked = False
 16 | 
 17 |     def rand_pos(self, env):
 18 |         topX, topY = self.top
 19 |         sizeX, sizeY = self.size
 20 |         return env._rand_pos(
 21 |             topX + 1, topX + sizeX - 1,
 22 |             topY + 1, topY + sizeY - 1
 23 |         )
 24 | 
 25 | class LockedRoom(MiniGridEnv):
 26 |     """
 27 |     Environment in which the agent is instructed to go to a given object
 28 |     named using an English text string
 29 |     """
 30 | 
 31 |     def __init__(
 32 |         self,
 33 |         size=19
 34 |     ):
 35 |         super().__init__(grid_size=size, max_steps=10*size)
 36 | 
 37 |     def _gen_grid(self, width, height):
 38 |         # Create the grid
 39 |         self.grid = Grid(width, height)
 40 | 
 41 |         # Generate the surrounding walls
 42 |         for i in range(0, width):
 43 |             self.grid.set(i, 0, Wall())
 44 |             self.grid.set(i, height-1, Wall())
 45 |         for j in range(0, height):
 46 |             self.grid.set(0, j, Wall())
 47 |             self.grid.set(width-1, j, Wall())
 48 | 
 49 |         # Hallway walls
 50 |         lWallIdx = width // 2 - 2
 51 |         rWallIdx = width // 2 + 2
 52 |         for j in range(0, height):
 53 |             self.grid.set(lWallIdx, j, Wall())
 54 |             self.grid.set(rWallIdx, j, Wall())
 55 | 
 56 |         self.rooms = []
 57 | 
 58 |         # Room splitting walls
 59 |         for n in range(0, 3):
 60 |             j = n * (height // 3)
 61 |             for i in range(0, lWallIdx):
 62 |                 self.grid.set(i, j, Wall())
 63 |             for i in range(rWallIdx, width):
 64 |                 self.grid.set(i, j, Wall())
 65 | 
 66 |             roomW = lWallIdx + 1
 67 |             roomH = height // 3 + 1
 68 |             self.rooms.append(Room(
 69 |                 (0, j),
 70 |                 (roomW, roomH),
 71 |                 (lWallIdx, j + 3)
 72 |             ))
 73 |             self.rooms.append(Room(
 74 |                 (rWallIdx, j),
 75 |                 (roomW, roomH),
 76 |                 (rWallIdx, j + 3)
 77 |             ))
 78 | 
 79 |         # Choose one random room to be locked
 80 |         lockedRoom = self._rand_elem(self.rooms)
 81 |         lockedRoom.locked = True
 82 |         goalPos = lockedRoom.rand_pos(self)
 83 |         self.grid.set(*goalPos, Goal())
 84 | 
 85 |         # Assign the door colors
 86 |         colors = set(COLOR_NAMES)
 87 |         for room in self.rooms:
 88 |             color = self._rand_elem(sorted(colors))
 89 |             colors.remove(color)
 90 |             room.color = color
 91 |             if room.locked:
 92 |                 self.grid.set(*room.doorPos, Door(color, is_locked=True))
 93 |             else:
 94 |                 self.grid.set(*room.doorPos, Door(color))
 95 | 
 96 |         # Select a random room to contain the key
 97 |         while True:
 98 |             keyRoom = self._rand_elem(self.rooms)
 99 |             if keyRoom != lockedRoom:
100 |                 break
101 |         keyPos = keyRoom.rand_pos(self)
102 |         self.grid.set(*keyPos, Key(lockedRoom.color))
103 | 
104 |         # Randomize the player start position and orientation
105 |         self.agent_pos = self.place_agent(
106 |             top=(lWallIdx, 0),
107 |             size=(rWallIdx-lWallIdx, height)
108 |         )
109 | 
110 |         # Generate the mission string
111 |         self.mission = (
112 |             'get the %s key from the %s room, '
113 |             'unlock the %s door and '
114 |             'go to the goal'
115 |         ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)
116 | 
117 |     def step(self, action):
118 |         obs, reward, done, info = MiniGridEnv.step(self, action)
119 |         return obs, reward, done, info
120 | 
121 | register(
122 |     id='MiniGrid-LockedRoom-v0',
123 |     entry_point='gym_minigrid.envs:LockedRoom'
124 | )
125 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/putnear.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | class PutNearEnv(MiniGridEnv):
  5 |     """
  6 |     Environment in which the agent is instructed to place an object near
  7 |     another object through a natural language string.
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         size=6,
 13 |         numObjs=2
 14 |     ):
 15 |         self.numObjs = numObjs
 16 | 
 17 |         super().__init__(
 18 |             grid_size=size,
 19 |             max_steps=5*size,
 20 |             # Set this to True for maximum speed
 21 |             see_through_walls=True
 22 |         )
 23 | 
 24 |     def _gen_grid(self, width, height):
 25 |         self.grid = Grid(width, height)
 26 | 
 27 |         # Generate the surrounding walls
 28 |         self.grid.horz_wall(0, 0)
 29 |         self.grid.horz_wall(0, height-1)
 30 |         self.grid.vert_wall(0, 0)
 31 |         self.grid.vert_wall(width-1, 0)
 32 | 
 33 |         # Types and colors of objects we can generate
 34 |         types = ['key', 'ball', 'box']
 35 | 
 36 |         objs = []
 37 |         objPos = []
 38 | 
 39 |         def near_obj(env, p1):
 40 |             for p2 in objPos:
 41 |                 dx = p1[0] - p2[0]
 42 |                 dy = p1[1] - p2[1]
 43 |                 if abs(dx) <= 1 and abs(dy) <= 1:
 44 |                     return True
 45 |             return False
 46 | 
 47 |         # Until we have generated all the objects
 48 |         while len(objs) < self.numObjs:
 49 |             objType = self._rand_elem(types)
 50 |             objColor = self._rand_elem(COLOR_NAMES)
 51 | 
 52 |             # If this object already exists, try again
 53 |             if (objType, objColor) in objs:
 54 |                 continue
 55 | 
 56 |             if objType == 'key':
 57 |                 obj = Key(objColor)
 58 |             elif objType == 'ball':
 59 |                 obj = Ball(objColor)
 60 |             elif objType == 'box':
 61 |                 obj = Box(objColor)
 62 | 
 63 |             pos = self.place_obj(obj, reject_fn=near_obj)
 64 | 
 65 |             objs.append((objType, objColor))
 66 |             objPos.append(pos)
 67 | 
 68 |         # Randomize the agent start position and orientation
 69 |         self.place_agent()
 70 | 
 71 |         # Choose a random object to be moved
 72 |         objIdx = self._rand_int(0, len(objs))
 73 |         self.move_type, self.moveColor = objs[objIdx]
 74 |         self.move_pos = objPos[objIdx]
 75 | 
 76 |         # Choose a target object (to put the first object next to)
 77 |         while True:
 78 |             targetIdx = self._rand_int(0, len(objs))
 79 |             if targetIdx != objIdx:
 80 |                 break
 81 |         self.target_type, self.target_color = objs[targetIdx]
 82 |         self.target_pos = objPos[targetIdx]
 83 | 
 84 |         self.mission = 'put the %s %s near the %s %s' % (
 85 |             self.moveColor,
 86 |             self.move_type,
 87 |             self.target_color,
 88 |             self.target_type
 89 |         )
 90 | 
 91 |     def step(self, action):
 92 |         preCarrying = self.carrying
 93 | 
 94 |         obs, reward, done, info = super().step(action)
 95 | 
 96 |         u, v = self.dir_vec
 97 |         ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v)
 98 |         tx, ty = self.target_pos
 99 | 
100 |         # If we picked up the wrong object, terminate the episode
101 |         if action == self.actions.pickup and self.carrying:
102 |             if self.carrying.type != self.move_type or self.carrying.color != self.moveColor:
103 |                 done = True
104 | 
105 |         # If successfully dropping an object near the target
106 |         if action == self.actions.drop and preCarrying:
107 |             if self.grid.get(ox, oy) is preCarrying:
108 |                 if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
109 |                     reward = self._reward()
110 |             done = True
111 | 
112 |         return obs, reward, done, info
113 | 
114 | class PutNear8x8N3(PutNearEnv):
115 |     def __init__(self):
116 |         super().__init__(size=8, numObjs=3)
117 | 
118 | register(
119 |     id='MiniGrid-PutNear-6x6-N2-v0',
120 |     entry_point='gym_minigrid.envs:PutNearEnv'
121 | )
122 | 
123 | register(
124 |     id='MiniGrid-PutNear-8x8-N3-v0',
125 |     entry_point='gym_minigrid.envs:PutNear8x8N3'
126 | )
127 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/dynamicobstacles.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | from operator import add
  4 | 
  5 | class DynamicObstaclesEnv(MiniGridEnv):
  6 |     """
  7 |     Single-room square grid environment with moving obstacles
  8 |     """
  9 | 
 10 |     def __init__(
 11 |             self,
 12 |             size=8,
 13 |             agent_start_pos=(1, 1),
 14 |             agent_start_dir=0,
 15 |             n_obstacles=4
 16 |     ):
 17 |         self.agent_start_pos = agent_start_pos
 18 |         self.agent_start_dir = agent_start_dir
 19 | 
 20 |         # Reduce obstacles if there are too many
 21 |         if n_obstacles <= size/2 + 1:
 22 |             self.n_obstacles = int(n_obstacles)
 23 |         else:
 24 |             self.n_obstacles = int(size/2)
 25 |         super().__init__(
 26 |             grid_size=size,
 27 |             max_steps=4 * size * size,
 28 |             # Set this to True for maximum speed
 29 |             see_through_walls=True,
 30 |         )
 31 |         # Allow only 3 actions permitted: left, right, forward
 32 |         self.action_space = spaces.Discrete(self.actions.forward + 1)
 33 |         self.reward_range = (-1, 1)
 34 | 
 35 |     def _gen_grid(self, width, height):
 36 |         # Create an empty grid
 37 |         self.grid = Grid(width, height)
 38 | 
 39 |         # Generate the surrounding walls
 40 |         self.grid.wall_rect(0, 0, width, height)
 41 | 
 42 |         # Place a goal square in the bottom-right corner
 43 |         self.grid.set(width - 2, height - 2, Goal())
 44 | 
 45 |         # Place the agent
 46 |         if self.agent_start_pos is not None:
 47 |             self.agent_pos = self.agent_start_pos
 48 |             self.agent_dir = self.agent_start_dir
 49 |         else:
 50 |             self.place_agent()
 51 | 
 52 |         # Place obstacles
 53 |         self.obstacles = []
 54 |         for i_obst in range(self.n_obstacles):
 55 |             self.obstacles.append(Ball())
 56 |             self.place_obj(self.obstacles[i_obst], max_tries=100)
 57 | 
 58 |         self.mission = "get to the green goal square"
 59 | 
 60 |     def step(self, action):
 61 |         # Invalid action
 62 |         if action >= self.action_space.n:
 63 |             action = 0
 64 | 
 65 |         # Check if there is an obstacle in front of the agent
 66 |         front_cell = self.grid.get(*self.front_pos)
 67 |         not_clear = front_cell and front_cell.type != 'goal'
 68 | 
 69 |         obs, reward, done, info = MiniGridEnv.step(self, action)
 70 | 
 71 |         # If the agent tries to walk over an obstacle
 72 |         if action == self.actions.forward and not_clear:
 73 |             reward = -1
 74 |             done = True
 75 |             return obs, reward, done, info
 76 | 
 77 |         # Update obstacle positions
 78 |         for i_obst in range(len(self.obstacles)):
 79 |             old_pos = self.obstacles[i_obst].cur_pos
 80 |             top = tuple(map(add, old_pos, (-1, -1)))
 81 | 
 82 |             try:
 83 |                 self.place_obj(self.obstacles[i_obst], top=top, size=(3,3), max_tries=100)
 84 |                 self.grid.set(*old_pos, None)
 85 |             except:
 86 |                 pass
 87 | 
 88 |         return obs, reward, done, info
 89 | 
 90 | class DynamicObstaclesEnv5x5(DynamicObstaclesEnv):
 91 |     def __init__(self):
 92 |         super().__init__(size=5, n_obstacles=2)
 93 | 
 94 | class DynamicObstaclesRandomEnv5x5(DynamicObstaclesEnv):
 95 |     def __init__(self):
 96 |         super().__init__(size=5, agent_start_pos=None, n_obstacles=2)
 97 | 
 98 | class DynamicObstaclesEnv6x6(DynamicObstaclesEnv):
 99 |     def __init__(self):
100 |         super().__init__(size=6, n_obstacles=3)
101 | 
102 | class DynamicObstaclesRandomEnv6x6(DynamicObstaclesEnv):
103 |     def __init__(self):
104 |         super().__init__(size=6, agent_start_pos=None, n_obstacles=3)
105 | 
106 | class DynamicObstaclesEnv16x16(DynamicObstaclesEnv):
107 |     def __init__(self):
108 |         super().__init__(size=16, n_obstacles=8)
109 | 
110 | register(
111 |     id='MiniGrid-Dynamic-Obstacles-5x5-v0',
112 |     entry_point='gym_minigrid.envs:DynamicObstaclesEnv5x5'
113 | )
114 | 
115 | register(
116 |     id='MiniGrid-Dynamic-Obstacles-Random-5x5-v0',
117 |     entry_point='gym_minigrid.envs:DynamicObstaclesRandomEnv5x5'
118 | )
119 | 
120 | register(
121 |     id='MiniGrid-Dynamic-Obstacles-6x6-v0',
122 |     entry_point='gym_minigrid.envs:DynamicObstaclesEnv6x6'
123 | )
124 | 
125 | register(
126 |     id='MiniGrid-Dynamic-Obstacles-Random-6x6-v0',
127 |     entry_point='gym_minigrid.envs:DynamicObstaclesRandomEnv6x6'
128 | )
129 | 
130 | register(
131 |     id='MiniGrid-Dynamic-Obstacles-8x8-v0',
132 |     entry_point='gym_minigrid.envs:DynamicObstaclesEnv'
133 | )
134 | 
135 | register(
136 |     id='MiniGrid-Dynamic-Obstacles-16x16-v0',
137 |     entry_point='gym_minigrid.envs:DynamicObstaclesEnv16x16'
138 | )
139 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/memory.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | class MemoryEnv(MiniGridEnv):
  5 |     """
  6 |     This environment is a memory test. The agent starts in a small room
  7 |     where it sees an object. It then has to go through a narrow hallway
  8 |     which ends in a split. At each end of the split there is an object,
  9 |     one of which is the same as the object in the starting room. The
 10 |     agent has to remember the initial object, and go to the matching
 11 |     object at split.
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         seed,
 17 |         size=8,
 18 |         random_length=False,
 19 |     ):
 20 |         self.random_length = random_length
 21 |         super().__init__(
 22 |             seed=seed,
 23 |             grid_size=size,
 24 |             max_steps=5*size**2,
 25 |             # Set this to True for maximum speed
 26 |             see_through_walls=False,
 27 |         )
 28 | 
 29 |     def _gen_grid(self, width, height):
 30 |         self.grid = Grid(width, height)
 31 | 
 32 |         # Generate the surrounding walls
 33 |         self.grid.horz_wall(0, 0)
 34 |         self.grid.horz_wall(0, height-1)
 35 |         self.grid.vert_wall(0, 0)
 36 |         self.grid.vert_wall(width - 1, 0)
 37 | 
 38 |         assert height % 2 == 1
 39 |         upper_room_wall = height // 2 - 2
 40 |         lower_room_wall = height // 2 + 2
 41 |         if self.random_length:
 42 |             hallway_end = self._rand_int(4, width - 2)
 43 |         else:
 44 |             hallway_end = width - 3
 45 | 
 46 |         # Start room
 47 |         for i in range(1, 5):
 48 |             self.grid.set(i, upper_room_wall, Wall())
 49 |             self.grid.set(i, lower_room_wall, Wall())
 50 |         self.grid.set(4, upper_room_wall + 1, Wall())
 51 |         self.grid.set(4, lower_room_wall - 1, Wall())
 52 | 
 53 |         # Horizontal hallway
 54 |         for i in range(5, hallway_end):
 55 |             self.grid.set(i, upper_room_wall + 1, Wall())
 56 |             self.grid.set(i, lower_room_wall - 1, Wall())
 57 | 
 58 |         # Vertical hallway
 59 |         for j in range(0, height):
 60 |             if j != height // 2:
 61 |                 self.grid.set(hallway_end, j, Wall())
 62 |             self.grid.set(hallway_end + 2, j, Wall())
 63 | 
 64 |         # Fix the player's start position and orientation
 65 |         self.agent_pos = (self._rand_int(1, hallway_end + 1), height // 2)
 66 |         self.agent_dir = 0
 67 | 
 68 |         # Place objects
 69 |         start_room_obj = self._rand_elem([Key, Ball])
 70 |         self.grid.set(1, height // 2 - 1, start_room_obj('green'))
 71 | 
 72 |         other_objs = self._rand_elem([[Ball, Key], [Key, Ball]])
 73 |         pos0 = (hallway_end + 1, height // 2 - 2)
 74 |         pos1 = (hallway_end + 1, height // 2 + 2)
 75 |         self.grid.set(*pos0, other_objs[0]('green'))
 76 |         self.grid.set(*pos1, other_objs[1]('green'))
 77 | 
 78 |         # Choose the target objects
 79 |         if start_room_obj == other_objs[0]:
 80 |             self.success_pos = (pos0[0], pos0[1] + 1)
 81 |             self.failure_pos = (pos1[0], pos1[1] - 1)
 82 |         else:
 83 |             self.success_pos = (pos1[0], pos1[1] - 1)
 84 |             self.failure_pos = (pos0[0], pos0[1] + 1)
 85 | 
 86 |         self.mission = 'go to the matching object at the end of the hallway'
 87 | 
 88 |     def step(self, action):
 89 |         if action == MiniGridEnv.Actions.pickup:
 90 |             action = MiniGridEnv.Actions.toggle
 91 |         obs, reward, done, info = MiniGridEnv.step(self, action)
 92 | 
 93 |         if tuple(self.agent_pos) == self.success_pos:
 94 |             reward = self._reward()
 95 |             done = True
 96 |         if tuple(self.agent_pos) == self.failure_pos:
 97 |             reward = 0
 98 |             done = True
 99 | 
100 |         return obs, reward, done, info
101 | 
102 | class MemoryS17Random(MemoryEnv):
103 |     def __init__(self, seed=None):
104 |         super().__init__(seed=seed, size=17, random_length=True)
105 | 
106 | register(
107 |     id='MiniGrid-MemoryS17Random-v0',
108 |     entry_point='gym_minigrid.envs:MemoryS17Random',
109 | )
110 | 
111 | class MemoryS13Random(MemoryEnv):
112 |     def __init__(self, seed=None):
113 |         super().__init__(seed=seed, size=13, random_length=True)
114 | 
115 | register(
116 |     id='MiniGrid-MemoryS13Random-v0',
117 |     entry_point='gym_minigrid.envs:MemoryS13Random',
118 | )
119 | 
120 | class MemoryS13(MemoryEnv):
121 |     def __init__(self, seed=None):
122 |         super().__init__(seed=seed, size=13)
123 | 
124 | register(
125 |     id='MiniGrid-MemoryS13-v0',
126 |     entry_point='gym_minigrid.envs:MemoryS13',
127 | )
128 | 
129 | class MemoryS11(MemoryEnv):
130 |     def __init__(self, seed=None):
131 |         super().__init__(seed=seed, size=11)
132 | 
133 | register(
134 |     id='MiniGrid-MemoryS11-v0',
135 |     entry_point='gym_minigrid.envs:MemoryS11',
136 | )
137 | 
138 | class MemoryS9(MemoryEnv):
139 |     def __init__(self, seed=None):
140 |         super().__init__(seed=seed, size=9)
141 | 
142 | register(
143 |     id='MiniGrid-MemoryS9-v0',
144 |     entry_point='gym_minigrid.envs:MemoryS9',
145 | )
146 | 
147 | class MemoryS7(MemoryEnv):
148 |     def __init__(self, seed=None):
149 |         super().__init__(seed=seed, size=7)
150 | 
151 | register(
152 |     id='MiniGrid-MemoryS7-v0',
153 |     entry_point='gym_minigrid.envs:MemoryS7',
154 | )
155 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/crossing.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | import itertools as itt
  5 | 
  6 | 
  7 | class CrossingEnv(MiniGridEnv):
  8 |     """
  9 |     Environment with wall or lava obstacles, sparse reward.
 10 |     """
 11 | 
 12 |     def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):
 13 |         self.num_crossings = num_crossings
 14 |         self.obstacle_type = obstacle_type
 15 |         super().__init__(
 16 |             grid_size=size,
 17 |             max_steps=4*size*size,
 18 |             # Set this to True for maximum speed
 19 |             see_through_walls=False,
 20 |             seed=None
 21 |         )
 22 | 
 23 |     def _gen_grid(self, width, height):
 24 |         assert width % 2 == 1 and height % 2 == 1  # odd size
 25 | 
 26 |         # Create an empty grid
 27 |         self.grid = Grid(width, height)
 28 | 
 29 |         # Generate the surrounding walls
 30 |         self.grid.wall_rect(0, 0, width, height)
 31 | 
 32 |         # Place the agent in the top-left corner
 33 |         self.agent_pos = (1, 1)
 34 |         self.agent_dir = 0
 35 | 
 36 |         # Place a goal square in the bottom-right corner
 37 |         self.grid.set(width - 2, height - 2, Goal())
 38 | 
 39 |         # Place obstacles (lava or walls)
 40 |         v, h = object(), object()  # singleton `vertical` and `horizontal` objects
 41 | 
 42 |         # Lava rivers or walls specified by direction and position in grid
 43 |         rivers = [(v, i) for i in range(2, height - 2, 2)]
 44 |         rivers += [(h, j) for j in range(2, width - 2, 2)]
 45 |         self.np_random.shuffle(rivers)
 46 |         rivers = rivers[:self.num_crossings]  # sample random rivers
 47 |         rivers_v = sorted([pos for direction, pos in rivers if direction is v])
 48 |         rivers_h = sorted([pos for direction, pos in rivers if direction is h])
 49 |         obstacle_pos = itt.chain(
 50 |             itt.product(range(1, width - 1), rivers_h),
 51 |             itt.product(rivers_v, range(1, height - 1)),
 52 |         )
 53 |         for i, j in obstacle_pos:
 54 |             self.grid.set(i, j, self.obstacle_type())
 55 | 
 56 |         # Sample path to goal
 57 |         path = [h] * len(rivers_v) + [v] * len(rivers_h)
 58 |         self.np_random.shuffle(path)
 59 | 
 60 |         # Create openings
 61 |         limits_v = [0] + rivers_v + [height - 1]
 62 |         limits_h = [0] + rivers_h + [width - 1]
 63 |         room_i, room_j = 0, 0
 64 |         for direction in path:
 65 |             if direction is h:
 66 |                 i = limits_v[room_i + 1]
 67 |                 j = self.np_random.choice(
 68 |                     range(limits_h[room_j] + 1, limits_h[room_j + 1]))
 69 |                 room_i += 1
 70 |             elif direction is v:
 71 |                 i = self.np_random.choice(
 72 |                     range(limits_v[room_i] + 1, limits_v[room_i + 1]))
 73 |                 j = limits_h[room_j + 1]
 74 |                 room_j += 1
 75 |             else:
 76 |                 assert False
 77 |             self.grid.set(i, j, None)
 78 | 
 79 |         self.mission = (
 80 |             "avoid the lava and get to the green goal square"
 81 |             if self.obstacle_type == Lava
 82 |             else "find the opening and get to the green goal square"
 83 |         )
 84 | 
 85 | class LavaCrossingEnv(CrossingEnv):
 86 |     def __init__(self):
 87 |         super().__init__(size=9, num_crossings=1)
 88 | 
 89 | class LavaCrossingS9N2Env(CrossingEnv):
 90 |     def __init__(self):
 91 |         super().__init__(size=9, num_crossings=2)
 92 | 
 93 | class LavaCrossingS9N3Env(CrossingEnv):
 94 |     def __init__(self):
 95 |         super().__init__(size=9, num_crossings=3)
 96 | 
 97 | class LavaCrossingS11N5Env(CrossingEnv):
 98 |     def __init__(self):
 99 |         super().__init__(size=11, num_crossings=5)
100 | 
101 | register(
102 |     id='MiniGrid-LavaCrossingS9N1-v0',
103 |     entry_point='gym_minigrid.envs:LavaCrossingEnv'
104 | )
105 | 
106 | register(
107 |     id='MiniGrid-LavaCrossingS9N2-v0',
108 |     entry_point='gym_minigrid.envs:LavaCrossingS9N2Env'
109 | )
110 | 
111 | register(
112 |     id='MiniGrid-LavaCrossingS9N3-v0',
113 |     entry_point='gym_minigrid.envs:LavaCrossingS9N3Env'
114 | )
115 | 
116 | register(
117 |     id='MiniGrid-LavaCrossingS11N5-v0',
118 |     entry_point='gym_minigrid.envs:LavaCrossingS11N5Env'
119 | )
120 | 
121 | class SimpleCrossingEnv(CrossingEnv):
122 |     def __init__(self):
123 |         super().__init__(size=9, num_crossings=1, obstacle_type=Wall)
124 | 
125 | class SimpleCrossingS9N2Env(CrossingEnv):
126 |     def __init__(self):
127 |         super().__init__(size=9, num_crossings=2, obstacle_type=Wall)
128 | 
129 | class SimpleCrossingS9N3Env(CrossingEnv):
130 |     def __init__(self):
131 |         super().__init__(size=9, num_crossings=3, obstacle_type=Wall)
132 | 
133 | class SimpleCrossingS11N5Env(CrossingEnv):
134 |     def __init__(self):
135 |         super().__init__(size=11, num_crossings=5, obstacle_type=Wall)
136 | 
137 | register(
138 |     id='MiniGrid-SimpleCrossingS9N1-v0',
139 |     entry_point='gym_minigrid.envs:SimpleCrossingEnv'
140 | )
141 | 
142 | register(
143 |     id='MiniGrid-SimpleCrossingS9N2-v0',
144 |     entry_point='gym_minigrid.envs:SimpleCrossingS9N2Env'
145 | )
146 | 
147 | register(
148 |     id='MiniGrid-SimpleCrossingS9N3-v0',
149 |     entry_point='gym_minigrid.envs:SimpleCrossingS9N3Env'
150 | )
151 | 
152 | register(
153 |     id='MiniGrid-SimpleCrossingS11N5-v0',
154 |     entry_point='gym_minigrid.envs:SimpleCrossingS11N5Env'
155 | )
156 | 


--------------------------------------------------------------------------------
/gym_minigrid/rendering.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PyQt5.QtCore import Qt
  3 | from PyQt5.QtGui import QImage, QPixmap, QPainter, QColor, QPolygon
  4 | from PyQt5.QtCore import QPoint, QSize, QRect
  5 | from PyQt5.QtWidgets import QApplication, QMainWindow, QWidget, QTextEdit
  6 | from PyQt5.QtWidgets import QHBoxLayout, QVBoxLayout, QLabel, QFrame
  7 | 
  8 | class Window(QMainWindow):
  9 |     """
 10 |     Simple application window to render the environment into
 11 |     """
 12 | 
 13 |     def __init__(self):
 14 |         super().__init__()
 15 | 
 16 |         self.setWindowTitle('MiniGrid Gym Environment')
 17 | 
 18 |         # Image label to display the rendering
 19 |         self.imgLabel = QLabel()
 20 |         self.imgLabel.setFrameStyle(QFrame.Panel | QFrame.Sunken)
 21 | 
 22 |         # Text box for the mission
 23 |         self.missionBox = QTextEdit()
 24 |         self.missionBox.setReadOnly(True)
 25 |         self.missionBox.setMinimumSize(400, 100)
 26 | 
 27 |         # Center the image
 28 |         hbox = QHBoxLayout()
 29 |         hbox.addStretch(1)
 30 |         hbox.addWidget(self.imgLabel)
 31 |         hbox.addStretch(1)
 32 | 
 33 |         # Arrange widgets vertically
 34 |         vbox = QVBoxLayout()
 35 |         vbox.addLayout(hbox)
 36 |         vbox.addWidget(self.missionBox)
 37 | 
 38 |         # Create a main widget for the window
 39 |         mainWidget = QWidget(self)
 40 |         self.setCentralWidget(mainWidget)
 41 |         mainWidget.setLayout(vbox)
 42 | 
 43 |         # Show the application window
 44 |         self.show()
 45 |         self.setFocus()
 46 | 
 47 |         self.closed = False
 48 | 
 49 |         # Callback for keyboard events
 50 |         self.keyDownCb = None
 51 | 
 52 |     def closeEvent(self, event):
 53 |         self.closed = True
 54 | 
 55 |     def setPixmap(self, pixmap):
 56 |         self.imgLabel.setPixmap(pixmap)
 57 | 
 58 |     def setText(self, text):
 59 |         self.missionBox.setPlainText(text)
 60 | 
 61 |     def setKeyDownCb(self, callback):
 62 |         self.keyDownCb = callback
 63 | 
 64 |     def keyPressEvent(self, e):
 65 |         if self.keyDownCb == None:
 66 |             return
 67 | 
 68 |         keyName = None
 69 |         if e.key() == Qt.Key_Left:
 70 |             keyName = 'LEFT'
 71 |         elif e.key() == Qt.Key_Right:
 72 |             keyName = 'RIGHT'
 73 |         elif e.key() == Qt.Key_Up:
 74 |             keyName = 'UP'
 75 |         elif e.key() == Qt.Key_Down:
 76 |             keyName = 'DOWN'
 77 |         elif e.key() == Qt.Key_Space:
 78 |             keyName = 'SPACE'
 79 |         elif e.key() == Qt.Key_Return:
 80 |             keyName = 'RETURN'
 81 |         elif e.key() == Qt.Key_Alt:
 82 |             keyName = 'ALT'
 83 |         elif e.key() == Qt.Key_Control:
 84 |             keyName = 'CTRL'
 85 |         elif e.key() == Qt.Key_PageUp:
 86 |             keyName = 'PAGE_UP'
 87 |         elif e.key() == Qt.Key_PageDown:
 88 |             keyName = 'PAGE_DOWN'
 89 |         elif e.key() == Qt.Key_Backspace:
 90 |             keyName = 'BACKSPACE'
 91 |         elif e.key() == Qt.Key_Escape:
 92 |             keyName = 'ESCAPE'
 93 | 
 94 |         if keyName == None:
 95 |             return
 96 |         self.keyDownCb(keyName)
 97 | 
 98 | class Renderer:
 99 |     def __init__(self, width, height, ownWindow=False):
100 |         self.width = width
101 |         self.height = height
102 | 
103 |         self.img = QImage(width, height, QImage.Format_RGB888)
104 |         self.painter = QPainter()
105 | 
106 |         self.window = None
107 |         if ownWindow:
108 |             self.app = QApplication([])
109 |             self.window = Window()
110 | 
111 |     def close(self):
112 |         """
113 |         Deallocate resources used
114 |         """
115 |         pass
116 | 
117 |     def beginFrame(self):
118 |         self.painter.begin(self.img)
119 |         self.painter.setRenderHint(QPainter.Antialiasing, False)
120 | 
121 |         # Clear the background
122 |         self.painter.setBrush(QColor(0, 0, 0))
123 |         self.painter.drawRect(0, 0, self.width - 1, self.height - 1)
124 | 
125 |     def endFrame(self):
126 |         self.painter.end()
127 | 
128 |         if self.window:
129 |             if self.window.closed:
130 |                 self.window = None
131 |             else:
132 |                 self.window.setPixmap(self.getPixmap())
133 |                 self.app.processEvents()
134 | 
135 |     def getPixmap(self):
136 |         return QPixmap.fromImage(self.img)
137 | 
138 |     def getArray(self):
139 |         """
140 |         Get a numpy array of RGB pixel values.
141 |         The array will have shape (height, width, 3)
142 |         """
143 | 
144 |         numBytes = self.width * self.height * 3
145 |         buf = self.img.bits().asstring(numBytes)
146 |         output = np.frombuffer(buf, dtype='uint8')
147 |         output = output.reshape((self.height, self.width, 3))
148 | 
149 |         return output
150 | 
151 |     def push(self):
152 |         self.painter.save()
153 | 
154 |     def pop(self):
155 |         self.painter.restore()
156 | 
157 |     def rotate(self, degrees):
158 |         self.painter.rotate(degrees)
159 | 
160 |     def translate(self, x, y):
161 |         self.painter.translate(x, y)
162 | 
163 |     def scale(self, x, y):
164 |         self.painter.scale(x, y)
165 | 
166 |     def setLineColor(self, r, g, b, a=255):
167 |         self.painter.setPen(QColor(r, g, b, a))
168 | 
169 |     def setColor(self, r, g, b, a=255):
170 |         self.painter.setBrush(QColor(r, g, b, a))
171 | 
172 |     def setLineWidth(self, width):
173 |         pen = self.painter.pen()
174 |         pen.setWidthF(width)
175 |         self.painter.setPen(pen)
176 | 
177 |     def drawLine(self, x0, y0, x1, y1):
178 |         self.painter.drawLine(x0, y0, x1, y1)
179 | 
180 |     def drawCircle(self, x, y, r):
181 |         center = QPoint(x, y)
182 |         self.painter.drawEllipse(center, r, r)
183 | 
184 |     def drawPolygon(self, points):
185 |         """Takes a list of points (tuples) as input"""
186 |         points = map(lambda p: QPoint(p[0], p[1]), points)
187 |         self.painter.drawPolygon(QPolygon(points))
188 | 
189 |     def drawPolyline(self, points):
190 |         """Takes a list of points (tuples) as input"""
191 |         points = map(lambda p: QPoint(p[0], p[1]), points)
192 |         self.painter.drawPolyline(QPolygon(points))
193 | 
194 |     def fillRect(self, x, y, width, height, r, g, b, a=255):
195 |         self.painter.fillRect(QRect(x, y, width, height), QColor(r, g, b, a))
196 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/obstructedmaze.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.roomgrid import RoomGrid
  3 | from gym_minigrid.register import register
  4 | 
  5 | class ObstructedMazeEnv(RoomGrid):
  6 |     """
  7 |     A blue ball is hidden in the maze. Doors may be locked,
  8 |     doors may be obstructed by a ball and keys may be hidden in boxes.
  9 |     """
 10 | 
 11 |     def __init__(self,
 12 |         num_rows,
 13 |         num_cols,
 14 |         num_rooms_visited,
 15 |         seed=None
 16 |     ):
 17 |         room_size = 6
 18 |         max_steps = 4*num_rooms_visited*room_size**2
 19 | 
 20 |         super().__init__(
 21 |             room_size=room_size,
 22 |             num_rows=num_rows,
 23 |             num_cols=num_cols,
 24 |             max_steps=max_steps,
 25 |             seed=seed
 26 |         )
 27 | 
 28 |     def _gen_grid(self, width, height):
 29 |         super()._gen_grid(width, height)
 30 | 
 31 |         # Define all possible colors for doors
 32 |         self.door_colors = self._rand_subset(COLOR_NAMES, len(COLOR_NAMES))
 33 |         # Define the color of the ball to pick up
 34 |         self.ball_to_find_color = COLOR_NAMES[0]
 35 |         # Define the color of the balls that obstruct doors
 36 |         self.blocking_ball_color = COLOR_NAMES[1]
 37 |         # Define the color of boxes in which keys are hidden
 38 |         self.box_color = COLOR_NAMES[2]
 39 | 
 40 |         self.mission = "pick up the %s ball" % self.ball_to_find_color
 41 | 
 42 |     def step(self, action):
 43 |         obs, reward, done, info = super().step(action)
 44 | 
 45 |         if action == self.actions.pickup:
 46 |             if self.carrying and self.carrying == self.obj:
 47 |                 reward = self._reward()
 48 |                 done = True
 49 | 
 50 |         return obs, reward, done, info
 51 | 
 52 |     def add_door(self, i, j, door_idx=0, color=None, locked=False, key_in_box=False, blocked=False):
 53 |         """
 54 |         Add a door. If the door must be locked, it also adds the key.
 55 |         If the key must be hidden, it is put in a box. If the door must
 56 |         be obstructed, it adds a ball in front of the door.
 57 |         """
 58 | 
 59 |         door, door_pos = super().add_door(i, j, door_idx, color, locked=locked)
 60 | 
 61 |         if blocked:
 62 |             vec = DIR_TO_VEC[door_idx]
 63 |             blocking_ball = Ball(self.blocking_ball_color) if blocked else None
 64 |             self.grid.set(door_pos[0]-vec[0], door_pos[1]-vec[1], blocking_ball)
 65 |             
 66 |         if locked:
 67 |             obj = Key(door.color)
 68 |             if key_in_box:
 69 |                 box = Box(self.box_color) if key_in_box else None
 70 |                 box.contains = obj
 71 |                 obj = box
 72 |             self.place_in_room(i, j, obj)
 73 | 
 74 |         return door, door_pos
 75 | 
 76 | class ObstructedMaze_1Dlhb(ObstructedMazeEnv):
 77 |     """
 78 |     A blue ball is hidden in a 2x1 maze. A locked door separates
 79 |     rooms. Doors are obstructed by a ball and keys are hidden in boxes.
 80 |     """
 81 | 
 82 |     def __init__(self, key_in_box=True, blocked=True, seed=None):
 83 |         self.key_in_box = key_in_box
 84 |         self.blocked = blocked
 85 | 
 86 |         super().__init__(
 87 |             num_rows=1,
 88 |             num_cols=2,
 89 |             num_rooms_visited=2,
 90 |             seed=seed
 91 |         )
 92 | 
 93 |     def _gen_grid(self, width, height):
 94 |         super()._gen_grid(width, height)
 95 | 
 96 |         self.add_door(0, 0, door_idx=0, color=self.door_colors[0],
 97 |                       locked=True,
 98 |                       key_in_box=self.key_in_box,
 99 |                       blocked=self.blocked)
100 | 
101 |         self.obj, _ = self.add_object(1, 0, "ball", color=self.ball_to_find_color)
102 |         self.place_agent(0, 0)
103 | 
104 | class ObstructedMaze_1Dl(ObstructedMaze_1Dlhb):
105 |     def __init__(self, seed=None):
106 |         super().__init__(False, False, seed)
107 | 
108 | class ObstructedMaze_1Dlh(ObstructedMaze_1Dlhb):
109 |     def __init__(self, seed=None):
110 |         super().__init__(True, False, seed)
111 | 
112 | class ObstructedMaze_Full(ObstructedMazeEnv):
113 |     """
114 |     A blue ball is hidden in one of the 4 corners of a 3x3 maze. Doors
115 |     are locked, doors are obstructed by a ball and keys are hidden in
116 |     boxes.
117 |     """
118 | 
119 |     def __init__(self, agent_room=(1, 1), key_in_box=True, blocked=True,
120 |                  num_quarters=4, num_rooms_visited=25, seed=None):
121 |         self.agent_room = agent_room
122 |         self.key_in_box = key_in_box
123 |         self.blocked = blocked
124 |         self.num_quarters = num_quarters
125 | 
126 |         super().__init__(
127 |             num_rows=3,
128 |             num_cols=3,
129 |             num_rooms_visited=num_rooms_visited,
130 |             seed=seed
131 |         )
132 | 
133 |     def _gen_grid(self, width, height):
134 |         super()._gen_grid(width, height)
135 | 
136 |         middle_room = (1, 1)
137 |         # Define positions of "side rooms" i.e. rooms that are neither
138 |         # corners nor the center.
139 |         side_rooms = [(2, 1), (1, 2), (0, 1), (1, 0)][:self.num_quarters]
140 |         for i in range(len(side_rooms)):
141 |             side_room = side_rooms[i]
142 | 
143 |             # Add a door between the center room and the side room
144 |             self.add_door(*middle_room, door_idx=i, color=self.door_colors[i], locked=False)
145 | 
146 |             for k in [-1, 1]:
147 |                 # Add a door to each side of the side room
148 |                 self.add_door(*side_room, locked=True,
149 |                               door_idx=(i+k)%4,
150 |                               color=self.door_colors[(i+k)%len(self.door_colors)],
151 |                               key_in_box=self.key_in_box,
152 |                               blocked=self.blocked)
153 | 
154 |         corners = [(2, 0), (2, 2), (0, 2), (0, 0)][:self.num_quarters]
155 |         ball_room = self._rand_elem(corners)
156 | 
157 |         self.obj, _ = self.add_object(*ball_room, "ball", color=self.ball_to_find_color)
158 |         self.place_agent(*self.agent_room)
159 | 
160 | class ObstructedMaze_2Dl(ObstructedMaze_Full):
161 |     def __init__(self, seed=None):
162 |         super().__init__((2, 1), False, False, 1, 4, seed)
163 | 
164 | class ObstructedMaze_2Dlh(ObstructedMaze_Full):
165 |     def __init__(self, seed=None):
166 |         super().__init__((2, 1), True, False, 1, 4, seed)
167 | 
168 | 
169 | class ObstructedMaze_2Dlhb(ObstructedMaze_Full):
170 |     def __init__(self, seed=None):
171 |         super().__init__((2, 1), True, True, 1, 4, seed)
172 | 
173 | class ObstructedMaze_1Q(ObstructedMaze_Full):
174 |     def __init__(self, seed=None):
175 |         super().__init__((1, 1), True, True, 1, 5, seed)
176 | 
177 | class ObstructedMaze_2Q(ObstructedMaze_Full):
178 |     def __init__(self, seed=None):
179 |         super().__init__((1, 1), True, True, 2, 11, seed)
180 | 
181 | register(
182 |     id="MiniGrid-ObstructedMaze-1Dl-v0",
183 |     entry_point="gym_minigrid.envs:ObstructedMaze_1Dl"
184 | )
185 | 
186 | register(
187 |     id="MiniGrid-ObstructedMaze-1Dlh-v0",
188 |     entry_point="gym_minigrid.envs:ObstructedMaze_1Dlh"
189 | )
190 | 
191 | register(
192 |     id="MiniGrid-ObstructedMaze-1Dlhb-v0",
193 |     entry_point="gym_minigrid.envs:ObstructedMaze_1Dlhb"
194 | )
195 | 
196 | register(
197 |     id="MiniGrid-ObstructedMaze-2Dl-v0",
198 |     entry_point="gym_minigrid.envs:ObstructedMaze_2Dl"
199 | )
200 | 
201 | register(
202 |     id="MiniGrid-ObstructedMaze-2Dlh-v0",
203 |     entry_point="gym_minigrid.envs:ObstructedMaze_2Dlh"
204 | )
205 | 
206 | register(
207 |     id="MiniGrid-ObstructedMaze-2Dlhb-v0",
208 |     entry_point="gym_minigrid.envs:ObstructedMaze_2Dlhb"
209 | )
210 | 
211 | register(
212 |     id="MiniGrid-ObstructedMaze-1Q-v0",
213 |     entry_point="gym_minigrid.envs:ObstructedMaze_1Q"
214 | )
215 | 
216 | register(
217 |     id="MiniGrid-ObstructedMaze-2Q-v0",
218 |     entry_point="gym_minigrid.envs:ObstructedMaze_2Q"
219 | )
220 | 
221 | register(
222 |     id="MiniGrid-ObstructedMaze-Full-v0",
223 |     entry_point="gym_minigrid.envs:ObstructedMaze_Full"
224 | )


--------------------------------------------------------------------------------
/gym_minigrid/wrappers.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import operator
  3 | from functools import reduce
  4 | 
  5 | import numpy as np
  6 | import gym
  7 | from gym import error, spaces, utils
  8 | from .minigrid import OBJECT_TO_IDX, COLOR_TO_IDX
  9 | from .minigrid import CELL_PIXELS
 10 | 
 11 | class ReseedWrapper(gym.core.Wrapper):
 12 |     """
 13 |     Wrapper to always regenerate an environment with the same set of seeds.
 14 |     This can be used to force an environment to always keep the same
 15 |     configuration when reset.
 16 |     """
 17 | 
 18 |     def __init__(self, env, seeds=[0], seed_idx=0):
 19 |         self.seeds = list(seeds)
 20 |         self.seed_idx = seed_idx
 21 |         super().__init__(env)
 22 | 
 23 |     def reset(self, **kwargs):
 24 |         seed = self.seeds[self.seed_idx]
 25 |         self.seed_idx = (self.seed_idx + 1) % len(self.seeds)
 26 |         self.env.seed(seed)
 27 |         return self.env.reset(**kwargs)
 28 | 
 29 |     def step(self, action):
 30 |         obs, reward, done, info = self.env.step(action)
 31 |         return obs, reward, done, info
 32 | 
 33 | class ActionBonus(gym.core.Wrapper):
 34 |     """
 35 |     Wrapper which adds an exploration bonus.
 36 |     This is a reward to encourage exploration of less
 37 |     visited (state,action) pairs.
 38 |     """
 39 | 
 40 |     def __init__(self, env):
 41 |         super().__init__(env)
 42 |         self.counts = {}
 43 | 
 44 |     def step(self, action):
 45 |         obs, reward, done, info = self.env.step(action)
 46 | 
 47 |         env = self.unwrapped
 48 |         tup = (tuple(env.agent_pos), env.agent_dir, action)
 49 | 
 50 |         # Get the count for this (s,a) pair
 51 |         pre_count = 0
 52 |         if tup in self.counts:
 53 |             pre_count = self.counts[tup]
 54 | 
 55 |         # Update the count for this (s,a) pair
 56 |         new_count = pre_count + 1
 57 |         self.counts[tup] = new_count
 58 | 
 59 |         bonus = 1 / math.sqrt(new_count)
 60 |         reward += bonus
 61 | 
 62 |         return obs, reward, done, info
 63 | 
 64 |     def reset(self, **kwargs):
 65 |         return self.env.reset(**kwargs)
 66 | 
 67 | class StateBonus(gym.core.Wrapper):
 68 |     """
 69 |     Adds an exploration bonus based on which positions
 70 |     are visited on the grid.
 71 |     """
 72 | 
 73 |     def __init__(self, env):
 74 |         super().__init__(env)
 75 |         self.counts = {}
 76 | 
 77 |     def step(self, action):
 78 |         obs, reward, done, info = self.env.step(action)
 79 | 
 80 |         # Tuple based on which we index the counts
 81 |         # We use the position after an update
 82 |         env = self.unwrapped
 83 |         tup = (tuple(env.agent_pos))
 84 | 
 85 |         # Get the count for this key
 86 |         pre_count = 0
 87 |         if tup in self.counts:
 88 |             pre_count = self.counts[tup]
 89 | 
 90 |         # Update the count for this key
 91 |         new_count = pre_count + 1
 92 |         self.counts[tup] = new_count
 93 | 
 94 |         bonus = 1 / math.sqrt(new_count)
 95 |         reward += bonus
 96 | 
 97 |         return obs, reward, done, info
 98 | 
 99 |     def reset(self, **kwargs):
100 |         return self.env.reset(**kwargs)
101 | 
102 | class ImgObsWrapper(gym.core.ObservationWrapper):
103 |     """
104 |     Use the image as the only observation output, no language/mission.
105 |     """
106 | 
107 |     def __init__(self, env):
108 |         super().__init__(env)
109 | 
110 |         self.observation_space = env.observation_space.spaces['image']
111 | 
112 |     def observation(self, obs):
113 |         return obs['image']
114 | 
115 | class RGBImgObsWrapper(gym.core.ObservationWrapper):
116 |     """
117 |     Wrapper to use fully observable RGB image as the only observation output,
118 |     no language/mission. This can be used to have the agent to solve the
119 |     gridworld in pixel space.
120 |     """
121 | 
122 |     def __init__(self, env, tile_size=8):
123 |         super().__init__(env)
124 | 
125 |         self.tile_size = tile_size
126 | 
127 |         self.observation_space = spaces.Box(
128 |             low=0,
129 |             high=255,
130 |             shape=(self.env.width*tile_size, self.env.height*tile_size, 3),
131 |             dtype='uint8'
132 |         )
133 | 
134 |     def observation(self, obs):
135 |         env = self.unwrapped
136 |         return env.render(
137 |             mode='rgb_array',
138 |             highlight=False,
139 |             tile_size=self.tile_size
140 |         )
141 | 
142 | class RGBImgPartialObsWrapper(gym.core.ObservationWrapper):
143 |     """
144 |     Wrapper to use partially observable RGB image as the only observation output
145 |     This can be used to have the agent to solve the gridworld in pixel space.
146 |     """
147 | 
148 |     def __init__(self, env, tile_size=8):
149 |         super().__init__(env)
150 | 
151 |         self.tile_size = tile_size
152 | 
153 |         obs_shape = env.observation_space['image'].shape
154 |         self.observation_space = spaces.Box(
155 |             low=0,
156 |             high=255,
157 |             shape=(obs_shape[0] * tile_size, obs_shape[1] * tile_size, 3),
158 |             dtype='uint8'
159 |         )
160 | 
161 |     def observation(self, obs):
162 |         env = self.unwrapped
163 |         return {
164 |             'mission': obs['mission'],
165 |             'image': env.get_obs_render(obs['image'], tile_size=self.tile_size, mode='rgb_array')
166 |         }
167 | 
168 | class FullyObsWrapper(gym.core.ObservationWrapper):
169 |     """
170 |     Fully observable gridworld using a compact grid encoding
171 |     """
172 | 
173 |     def __init__(self, env):
174 |         super().__init__(env)
175 | 
176 |         self.observation_space = spaces.Box(
177 |             low=0,
178 |             high=255,
179 |             shape=(self.env.width, self.env.height, 3),  # number of cells
180 |             dtype='uint8'
181 |         )
182 | 
183 |     def observation(self, obs):
184 |         env = self.unwrapped
185 |         full_grid = env.grid.encode()
186 |         full_grid[env.agent_pos[0]][env.agent_pos[1]] = np.array([
187 |             OBJECT_TO_IDX['agent'],
188 |             COLOR_TO_IDX['red'],
189 |             env.agent_dir
190 |         ])
191 | 
192 |         return full_grid
193 | 
194 | class FlatObsWrapper(gym.core.ObservationWrapper):
195 |     """
196 |     Encode mission strings using a one-hot scheme,
197 |     and combine these with observed images into one flat array
198 |     """
199 | 
200 |     def __init__(self, env, maxStrLen=96):
201 |         super().__init__(env)
202 | 
203 |         self.maxStrLen = maxStrLen
204 |         self.numCharCodes = 27
205 | 
206 |         imgSpace = env.observation_space.spaces['image']
207 |         imgSize = reduce(operator.mul, imgSpace.shape, 1)
208 | 
209 |         self.observation_space = spaces.Box(
210 |             low=0,
211 |             high=255,
212 |             shape=(1, imgSize + self.numCharCodes * self.maxStrLen),
213 |             dtype='uint8'
214 |         )
215 | 
216 |         self.cachedStr = None
217 |         self.cachedArray = None
218 | 
219 |     def observation(self, obs):
220 |         image = obs['image']
221 |         mission = obs['mission']
222 | 
223 |         # Cache the last-encoded mission string
224 |         if mission != self.cachedStr:
225 |             assert len(mission) <= self.maxStrLen, 'mission string too long ({} chars)'.format(len(mission))
226 |             mission = mission.lower()
227 | 
228 |             strArray = np.zeros(shape=(self.maxStrLen, self.numCharCodes), dtype='float32')
229 | 
230 |             for idx, ch in enumerate(mission):
231 |                 if ch >= 'a' and ch <= 'z':
232 |                     chNo = ord(ch) - ord('a')
233 |                 elif ch == ' ':
234 |                     chNo = ord('z') - ord('a') + 1
235 |                 assert chNo < self.numCharCodes, '%s : %d' % (ch, chNo)
236 |                 strArray[idx, chNo] = 1
237 | 
238 |             self.cachedStr = mission
239 |             self.cachedArray = strArray
240 | 
241 |         obs = np.concatenate((image.flatten(), self.cachedArray.flatten()))
242 | 
243 |         return obs
244 | 
245 | class AgentViewWrapper(gym.core.Wrapper):
246 |     """
247 |     Wrapper to customize the agent field of view size.
248 |     """
249 | 
250 |     def __init__(self, env, agent_view_size=7):
251 |         super(AgentViewWrapper, self).__init__(env)
252 | 
253 |         # Override default view size
254 |         env.unwrapped.agent_view_size = agent_view_size
255 | 
256 |         # Compute observation space with specified view size
257 |         observation_space = gym.spaces.Box(
258 |             low=0,
259 |             high=255,
260 |             shape=(agent_view_size, agent_view_size, 3),
261 |             dtype='uint8'
262 |         )
263 | 
264 |         # Override the environment's observation space
265 |         self.observation_space = spaces.Dict({
266 |             'image': observation_space
267 |         })
268 | 
269 |     def reset(self, **kwargs):
270 |         return self.env.reset(**kwargs)
271 | 
272 |     def step(self, action):
273 |         return self.env.step(action)
274 | 


--------------------------------------------------------------------------------
/gym_minigrid/envs/multiroom.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | class Room:
  5 |     def __init__(self,
  6 |         top,
  7 |         size,
  8 |         entryDoorPos,
  9 |         exitDoorPos
 10 |     ):
 11 |         self.top = top
 12 |         self.size = size
 13 |         self.entryDoorPos = entryDoorPos
 14 |         self.exitDoorPos = exitDoorPos
 15 | 
 16 | class MultiRoomEnv(MiniGridEnv):
 17 |     """
 18 |     Environment with multiple rooms (subgoals)
 19 |     """
 20 | 
 21 |     def __init__(self,
 22 |         minNumRooms,
 23 |         maxNumRooms,
 24 |         maxRoomSize=10
 25 |     ):
 26 |         assert minNumRooms > 0
 27 |         assert maxNumRooms >= minNumRooms
 28 |         assert maxRoomSize >= 4
 29 | 
 30 |         self.minNumRooms = minNumRooms
 31 |         self.maxNumRooms = maxNumRooms
 32 |         self.maxRoomSize = maxRoomSize
 33 | 
 34 |         self.rooms = []
 35 | 
 36 |         super(MultiRoomEnv, self).__init__(
 37 |             grid_size=25,
 38 |             max_steps=self.maxNumRooms * 20
 39 |         )
 40 | 
 41 |     def _gen_grid(self, width, height):
 42 |         roomList = []
 43 | 
 44 |         # Choose a random number of rooms to generate
 45 |         numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms+1)
 46 | 
 47 |         while len(roomList) < numRooms:
 48 |             curRoomList = []
 49 | 
 50 |             entryDoorPos = (
 51 |                 self._rand_int(0, width - 2),
 52 |                 self._rand_int(0, width - 2)
 53 |             )
 54 | 
 55 |             # Recursively place the rooms
 56 |             self._placeRoom(
 57 |                 numRooms,
 58 |                 roomList=curRoomList,
 59 |                 minSz=4,
 60 |                 maxSz=self.maxRoomSize,
 61 |                 entryDoorWall=2,
 62 |                 entryDoorPos=entryDoorPos
 63 |             )
 64 | 
 65 |             if len(curRoomList) > len(roomList):
 66 |                 roomList = curRoomList
 67 | 
 68 |         # Store the list of rooms in this environment
 69 |         assert len(roomList) > 0
 70 |         self.rooms = roomList
 71 | 
 72 |         # Create the grid
 73 |         self.grid = Grid(width, height)
 74 |         wall = Wall()
 75 | 
 76 |         prevDoorColor = None
 77 | 
 78 |         # For each room
 79 |         for idx, room in enumerate(roomList):
 80 | 
 81 |             topX, topY = room.top
 82 |             sizeX, sizeY = room.size
 83 | 
 84 |             # Draw the top and bottom walls
 85 |             for i in range(0, sizeX):
 86 |                 self.grid.set(topX + i, topY, wall)
 87 |                 self.grid.set(topX + i, topY + sizeY - 1, wall)
 88 | 
 89 |             # Draw the left and right walls
 90 |             for j in range(0, sizeY):
 91 |                 self.grid.set(topX, topY + j, wall)
 92 |                 self.grid.set(topX + sizeX - 1, topY + j, wall)
 93 | 
 94 |             # If this isn't the first room, place the entry door
 95 |             if idx > 0:
 96 |                 # Pick a door color different from the previous one
 97 |                 doorColors = set(COLOR_NAMES)
 98 |                 if prevDoorColor:
 99 |                     doorColors.remove(prevDoorColor)
100 |                 # Note: the use of sorting here guarantees determinism,
101 |                 # This is needed because Python's set is not deterministic
102 |                 doorColor = self._rand_elem(sorted(doorColors))
103 | 
104 |                 entryDoor = Door(doorColor)
105 |                 self.grid.set(*room.entryDoorPos, entryDoor)
106 |                 prevDoorColor = doorColor
107 | 
108 |                 prevRoom = roomList[idx-1]
109 |                 prevRoom.exitDoorPos = room.entryDoorPos
110 | 
111 |         # Randomize the starting agent position and direction
112 |         self.place_agent(roomList[0].top, roomList[0].size)
113 | 
114 |         # Place the final goal in the last room
115 |         self.goal_pos = self.place_obj(Goal(), roomList[-1].top, roomList[-1].size)
116 | 
117 |         self.mission = 'traverse the rooms to get to the goal'
118 | 
119 |     def _placeRoom(
120 |         self,
121 |         numLeft,
122 |         roomList,
123 |         minSz,
124 |         maxSz,
125 |         entryDoorWall,
126 |         entryDoorPos
127 |     ):
128 |         # Choose the room size randomly
129 |         sizeX = self._rand_int(minSz, maxSz+1)
130 |         sizeY = self._rand_int(minSz, maxSz+1)
131 | 
132 |         # The first room will be at the door position
133 |         if len(roomList) == 0:
134 |             topX, topY = entryDoorPos
135 |         # Entry on the right
136 |         elif entryDoorWall == 0:
137 |             topX = entryDoorPos[0] - sizeX + 1
138 |             y = entryDoorPos[1]
139 |             topY = self._rand_int(y - sizeY + 2, y)
140 |         # Entry wall on the south
141 |         elif entryDoorWall == 1:
142 |             x = entryDoorPos[0]
143 |             topX = self._rand_int(x - sizeX + 2, x)
144 |             topY = entryDoorPos[1] - sizeY + 1
145 |         # Entry wall on the left
146 |         elif entryDoorWall == 2:
147 |             topX = entryDoorPos[0]
148 |             y = entryDoorPos[1]
149 |             topY = self._rand_int(y - sizeY + 2, y)
150 |         # Entry wall on the top
151 |         elif entryDoorWall == 3:
152 |             x = entryDoorPos[0]
153 |             topX = self._rand_int(x - sizeX + 2, x)
154 |             topY = entryDoorPos[1]
155 |         else:
156 |             assert False, entryDoorWall
157 | 
158 |         # If the room is out of the grid, can't place a room here
159 |         if topX < 0 or topY < 0:
160 |             return False
161 |         if topX + sizeX > self.width or topY + sizeY >= self.height:
162 |             return False
163 | 
164 |         # If the room intersects with previous rooms, can't place it here
165 |         for room in roomList[:-1]:
166 |             nonOverlap = \
167 |                 topX + sizeX < room.top[0] or \
168 |                 room.top[0] + room.size[0] <= topX or \
169 |                 topY + sizeY < room.top[1] or \
170 |                 room.top[1] + room.size[1] <= topY
171 | 
172 |             if not nonOverlap:
173 |                 return False
174 | 
175 |         # Add this room to the list
176 |         roomList.append(Room(
177 |             (topX, topY),
178 |             (sizeX, sizeY),
179 |             entryDoorPos,
180 |             None
181 |         ))
182 | 
183 |         # If this was the last room, stop
184 |         if numLeft == 1:
185 |             return True
186 | 
187 |         # Try placing the next room
188 |         for i in range(0, 8):
189 | 
190 |             # Pick which wall to place the out door on
191 |             wallSet = set((0, 1, 2, 3))
192 |             wallSet.remove(entryDoorWall)
193 |             exitDoorWall = self._rand_elem(sorted(wallSet))
194 |             nextEntryWall = (exitDoorWall + 2) % 4
195 | 
196 |             # Pick the exit door position
197 |             # Exit on right wall
198 |             if exitDoorWall == 0:
199 |                 exitDoorPos = (
200 |                     topX + sizeX - 1,
201 |                     topY + self._rand_int(1, sizeY - 1)
202 |                 )
203 |             # Exit on south wall
204 |             elif exitDoorWall == 1:
205 |                 exitDoorPos = (
206 |                     topX + self._rand_int(1, sizeX - 1),
207 |                     topY + sizeY - 1
208 |                 )
209 |             # Exit on left wall
210 |             elif exitDoorWall == 2:
211 |                 exitDoorPos = (
212 |                     topX,
213 |                     topY + self._rand_int(1, sizeY - 1)
214 |                 )
215 |             # Exit on north wall
216 |             elif exitDoorWall == 3:
217 |                 exitDoorPos = (
218 |                     topX + self._rand_int(1, sizeX - 1),
219 |                     topY
220 |                 )
221 |             else:
222 |                 assert False
223 | 
224 |             # Recursively create the other rooms
225 |             success = self._placeRoom(
226 |                 numLeft - 1,
227 |                 roomList=roomList,
228 |                 minSz=minSz,
229 |                 maxSz=maxSz,
230 |                 entryDoorWall=nextEntryWall,
231 |                 entryDoorPos=exitDoorPos
232 |             )
233 | 
234 |             if success:
235 |                 break
236 | 
237 |         return True
238 | 
239 | class MultiRoomEnvN2S4(MultiRoomEnv):
240 |     def __init__(self):
241 |         super().__init__(
242 |             minNumRooms=2,
243 |             maxNumRooms=2,
244 |             maxRoomSize=4
245 |         )
246 | 
247 | class MultiRoomEnvN4S5(MultiRoomEnv):
248 |     def __init__(self):
249 |         super().__init__(
250 |             minNumRooms=4,
251 |             maxNumRooms=4,
252 |             maxRoomSize=5
253 |         )
254 | 
255 | class MultiRoomEnvN6(MultiRoomEnv):
256 |     def __init__(self):
257 |         super().__init__(
258 |             minNumRooms=6,
259 |             maxNumRooms=6
260 |         )
261 | 
262 | register(
263 |     id='MiniGrid-MultiRoom-N2-S4-v0',
264 |     entry_point='gym_minigrid.envs:MultiRoomEnvN2S4'
265 | )
266 | 
267 | register(
268 |     id='MiniGrid-MultiRoom-N4-S5-v0',
269 |     entry_point='gym_minigrid.envs:MultiRoomEnvN4S5'
270 | )
271 | 
272 | register(
273 |     id='MiniGrid-MultiRoom-N6-v0',
274 |     entry_point='gym_minigrid.envs:MultiRoomEnvN6'
275 | )
276 | 


--------------------------------------------------------------------------------
/gym_minigrid/roomgrid.py:
--------------------------------------------------------------------------------
  1 | from .minigrid import *
  2 | 
  3 | def reject_next_to(env, pos):
  4 |     """
  5 |     Function to filter out object positions that are right next to
  6 |     the agent's starting point
  7 |     """
  8 | 
  9 |     sx, sy = env.agent_pos
 10 |     x, y = pos
 11 |     d = abs(sx - x) + abs(sy - y)
 12 |     return d < 2
 13 | 
 14 | class Room:
 15 |     def __init__(
 16 |         self,
 17 |         top,
 18 |         size
 19 |     ):
 20 |         # Top-left corner and size (tuples)
 21 |         self.top = top
 22 |         self.size = size
 23 | 
 24 |         # List of door objects and door positions
 25 |         # Order of the doors is right, down, left, up
 26 |         self.doors = [None] * 4
 27 |         self.door_pos = [None] * 4
 28 | 
 29 |         # List of rooms adjacent to this one
 30 |         # Order of the neighbors is right, down, left, up
 31 |         self.neighbors = [None] * 4
 32 | 
 33 |         # Indicates if this room is behind a locked door
 34 |         self.locked = False
 35 | 
 36 |         # List of objects contained
 37 |         self.objs = []
 38 | 
 39 |     def rand_pos(self, env):
 40 |         topX, topY = self.top
 41 |         sizeX, sizeY = self.size
 42 |         return env._randPos(
 43 |             topX + 1, topX + sizeX - 1,
 44 |             topY + 1, topY + sizeY - 1
 45 |         )
 46 | 
 47 |     def pos_inside(self, x, y):
 48 |         """
 49 |         Check if a position is within the bounds of this room
 50 |         """
 51 | 
 52 |         topX, topY = self.top
 53 |         sizeX, sizeY = self.size
 54 | 
 55 |         if x < topX or y < topY:
 56 |             return False
 57 | 
 58 |         if x >= topX + sizeX or y >= topY + sizeY:
 59 |             return False
 60 | 
 61 |         return True
 62 | 
 63 | class RoomGrid(MiniGridEnv):
 64 |     """
 65 |     Environment with multiple rooms and random objects.
 66 |     This is meant to serve as a base class for other environments.
 67 |     """
 68 | 
 69 |     def __init__(
 70 |         self,
 71 |         room_size=7,
 72 |         num_rows=3,
 73 |         num_cols=3,
 74 |         max_steps=100,
 75 |         seed=0
 76 |     ):
 77 |         assert room_size > 0
 78 |         assert room_size >= 3
 79 |         assert num_rows > 0
 80 |         assert num_cols > 0
 81 |         self.room_size = room_size
 82 |         self.num_rows = num_rows
 83 |         self.num_cols = num_cols
 84 | 
 85 |         height = (room_size - 1) * num_rows + 1
 86 |         width = (room_size - 1) * num_cols + 1
 87 | 
 88 |         # By default, this environment has no mission
 89 |         self.mission = ''
 90 | 
 91 |         super().__init__(
 92 |             width=width,
 93 |             height=height,
 94 |             max_steps=max_steps,
 95 |             see_through_walls=False,
 96 |             seed=seed
 97 |         )
 98 | 
 99 |     def room_from_pos(self, x, y):
100 |         """Get the room a given position maps to"""
101 | 
102 |         assert x >= 0
103 |         assert y >= 0
104 | 
105 |         i = x // (self.room_size-1)
106 |         j = y // (self.room_size-1)
107 | 
108 |         assert i < self.num_cols
109 |         assert j < self.num_rows
110 | 
111 |         return self.room_grid[j][i]
112 | 
113 |     def get_room(self, i, j):
114 |         assert i < self.num_cols
115 |         assert j < self.num_rows
116 |         return self.room_grid[j][i]
117 | 
118 |     def _gen_grid(self, width, height):
119 |         # Create the grid
120 |         self.grid = Grid(width, height)
121 | 
122 |         self.room_grid = []
123 | 
124 |         # For each row of rooms
125 |         for j in range(0, self.num_rows):
126 |             row = []
127 | 
128 |             # For each column of rooms
129 |             for i in range(0, self.num_cols):
130 |                 room = Room(
131 |                     (i * (self.room_size-1), j * (self.room_size-1)),
132 |                     (self.room_size, self.room_size)
133 |                 )
134 |                 row.append(room)
135 | 
136 |                 # Generate the walls for this room
137 |                 self.grid.wall_rect(*room.top, *room.size)
138 | 
139 |             self.room_grid.append(row)
140 | 
141 |         # For each row of rooms
142 |         for j in range(0, self.num_rows):
143 |             # For each column of rooms
144 |             for i in range(0, self.num_cols):
145 |                 room = self.room_grid[j][i]
146 | 
147 |                 x_l, y_l = (room.top[0] + 1, room.top[1] + 1)
148 |                 x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1)
149 | 
150 |                 # Door positions, order is right, down, left, up
151 |                 if i < self.num_cols - 1:
152 |                     room.neighbors[0] = self.room_grid[j][i+1]
153 |                     room.door_pos[0] = (x_m, self._rand_int(y_l, y_m))
154 |                 if j < self.num_rows - 1:
155 |                     room.neighbors[1] = self.room_grid[j+1][i]
156 |                     room.door_pos[1] = (self._rand_int(x_l, x_m), y_m)
157 |                 if i > 0:
158 |                     room.neighbors[2] = self.room_grid[j][i-1]
159 |                     room.door_pos[2] = room.neighbors[2].door_pos[0]
160 |                 if j > 0:
161 |                     room.neighbors[3] = self.room_grid[j-1][i]
162 |                     room.door_pos[3] = room.neighbors[3].door_pos[1]
163 | 
164 |         # The agent starts in the middle, facing right
165 |         self.agent_pos = (
166 |             (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2),
167 |             (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2)
168 |         )
169 |         self.agent_dir = 0
170 | 
171 |     def place_in_room(self, i, j, obj):
172 |         """
173 |         Add an existing object to room (i, j)
174 |         """
175 | 
176 |         room = self.get_room(i, j)
177 | 
178 |         pos = self.place_obj(
179 |             obj,
180 |             room.top,
181 |             room.size,
182 |             reject_fn=reject_next_to,
183 |             max_tries=1000
184 |         )
185 | 
186 |         room.objs.append(obj)
187 | 
188 |         return obj, pos
189 | 
190 |     def add_object(self, i, j, kind=None, color=None):
191 |         """
192 |         Add a new object to room (i, j)
193 |         """
194 | 
195 |         if kind == None:
196 |             kind = self._rand_elem(['key', 'ball', 'box'])
197 | 
198 |         if color == None:
199 |             color = self._rand_color()
200 | 
201 |         # TODO: we probably want to add an Object.make helper function
202 |         assert kind in ['key', 'ball', 'box']
203 |         if kind == 'key':
204 |             obj = Key(color)
205 |         elif kind == 'ball':
206 |             obj = Ball(color)
207 |         elif kind == 'box':
208 |             obj = Box(color)
209 | 
210 |         return self.place_in_room(i, j, obj)
211 | 
212 |     def add_door(self, i, j, door_idx=None, color=None, locked=None):
213 |         """
214 |         Add a door to a room, connecting it to a neighbor
215 |         """
216 | 
217 |         room = self.get_room(i, j)
218 | 
219 |         if door_idx == None:
220 |             # Need to make sure that there is a neighbor along this wall
221 |             # and that there is not already a door
222 |             while True:
223 |                 door_idx = self._rand_int(0, 4)
224 |                 if room.neighbors[door_idx] and room.doors[door_idx] is None:
225 |                     break
226 | 
227 |         if color == None:
228 |             color = self._rand_color()
229 | 
230 |         if locked is None:
231 |             locked = self._rand_bool()
232 | 
233 |         assert room.doors[door_idx] is None, "door already exists"
234 | 
235 |         room.locked = locked
236 |         door = Door(color, is_locked=locked)
237 | 
238 |         pos = room.door_pos[door_idx]
239 |         self.grid.set(*pos, door)
240 |         door.cur_pos = pos
241 | 
242 |         neighbor = room.neighbors[door_idx]
243 |         room.doors[door_idx] = door
244 |         neighbor.doors[(door_idx+2) % 4] = door
245 | 
246 |         return door, pos
247 | 
248 |     def remove_wall(self, i, j, wall_idx):
249 |         """
250 |         Remove a wall between two rooms
251 |         """
252 | 
253 |         room = self.get_room(i, j)
254 | 
255 |         assert wall_idx >= 0 and wall_idx < 4
256 |         assert room.doors[wall_idx] is None, "door exists on this wall"
257 |         assert room.neighbors[wall_idx], "invalid wall"
258 | 
259 |         neighbor = room.neighbors[wall_idx]
260 | 
261 |         tx, ty = room.top
262 |         w, h = room.size
263 | 
264 |         # Ordering of walls is right, down, left, up
265 |         if wall_idx == 0:
266 |             for i in range(1, h - 1):
267 |                 self.grid.set(tx + w - 1, ty + i, None)
268 |         elif wall_idx == 1:
269 |             for i in range(1, w - 1):
270 |                 self.grid.set(tx + i, ty + h - 1, None)
271 |         elif wall_idx == 2:
272 |             for i in range(1, h - 1):
273 |                 self.grid.set(tx, ty + i, None)
274 |         elif wall_idx == 3:
275 |             for i in range(1, w - 1):
276 |                 self.grid.set(tx + i, ty, None)
277 |         else:
278 |             assert False, "invalid wall index"
279 | 
280 |         # Mark the rooms as connected
281 |         room.doors[wall_idx] = True
282 |         neighbor.doors[(wall_idx+2) % 4] = True
283 | 
284 |     def place_agent(self, i=None, j=None, rand_dir=True):
285 |         """
286 |         Place the agent in a room
287 |         """
288 | 
289 |         if i == None:
290 |             i = self._rand_int(0, self.num_cols)
291 |         if j == None:
292 |             j = self._rand_int(0, self.num_rows)
293 | 
294 |         room = self.room_grid[j][i]
295 | 
296 |         # Find a position that is not right in front of an object
297 |         while True:
298 |             super().place_agent(room.top, room.size, rand_dir, max_tries=1000)
299 |             front_cell = self.grid.get(*self.front_pos)
300 |             if front_cell is None or front_cell.type is 'wall':
301 |                 break
302 | 
303 |         return self.agent_pos
304 | 
305 |     def connect_all(self, door_colors=COLOR_NAMES, max_itrs=5000):
306 |         """
307 |         Make sure that all rooms are reachable by the agent from its
308 |         starting position
309 |         """
310 | 
311 |         start_room = self.room_from_pos(*self.agent_pos)
312 | 
313 |         added_doors = []
314 | 
315 |         def find_reach():
316 |             reach = set()
317 |             stack = [start_room]
318 |             while len(stack) > 0:
319 |                 room = stack.pop()
320 |                 if room in reach:
321 |                     continue
322 |                 reach.add(room)
323 |                 for i in range(0, 4):
324 |                     if room.doors[i]:
325 |                         stack.append(room.neighbors[i])
326 |             return reach
327 | 
328 |         num_itrs = 0
329 | 
330 |         while True:
331 |             # This is to handle rare situations where random sampling produces
332 |             # a level that cannot be connected, producing in an infinite loop
333 |             if num_itrs > max_itrs:
334 |                 raise RecursionError('connect_all failed')
335 |             num_itrs += 1
336 | 
337 |             # If all rooms are reachable, stop
338 |             reach = find_reach()
339 |             if len(reach) == self.num_rows * self.num_cols:
340 |                 break
341 | 
342 |             # Pick a random room and door position
343 |             i = self._rand_int(0, self.num_cols)
344 |             j = self._rand_int(0, self.num_rows)
345 |             k = self._rand_int(0, 4)
346 |             room = self.get_room(i, j)
347 | 
348 |             # If there is already a door there, skip
349 |             if not room.door_pos[k] or room.doors[k]:
350 |                 continue
351 | 
352 |             if room.locked or room.neighbors[k].locked:
353 |                 continue
354 | 
355 |             color = self._rand_elem(door_colors)
356 |             door, _ = self.add_door(i, j, k, color, False)
357 |             added_doors.append(door)
358 | 
359 |         return added_doors
360 | 
361 |     def add_distractors(self, i=None, j=None, num_distractors=10, all_unique=True):
362 |         """
363 |         Add random objects that can potentially distract/confuse the agent.
364 |         """
365 | 
366 |         # Collect a list of existing objects
367 |         objs = []
368 |         for row in self.room_grid:
369 |             for room in row:
370 |                 for obj in room.objs:
371 |                     objs.append((obj.type, obj.color))
372 | 
373 |         # List of distractors added
374 |         dists = []
375 | 
376 |         while len(dists) < num_distractors:
377 |             color = self._rand_elem(COLOR_NAMES)
378 |             type = self._rand_elem(['key', 'ball', 'box'])
379 |             obj = (type, color)
380 | 
381 |             if all_unique and obj in objs:
382 |                 continue
383 | 
384 |             # Add the object to a random room if no room specified
385 |             room_i = i
386 |             room_j = j
387 |             if room_i == None:
388 |                 room_i = self._rand_int(0, self.num_cols)
389 |             if room_j == None:
390 |                 room_j = self._rand_int(0, self.num_rows)
391 | 
392 |             dist, pos = self.add_object(room_i, room_j, *obj)
393 | 
394 |             objs.append(obj)
395 |             dists.append(dist)
396 | 
397 |         return dists
398 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Minimalistic Gridworld Environment (MiniGrid)
  2 | 
  3 | [![Build Status](https://travis-ci.org/maximecb/gym-minigrid.svg?branch=master)](https://travis-ci.org/maximecb/gym-minigrid)
  4 | 
  5 | There are other gridworld Gym environments out there, but this one is
  6 | designed to be particularly simple, lightweight and fast. The code has very few
  7 | dependencies, making it less likely to break or fail to install. It loads no
  8 | external sprites/textures, and it can run at up to 5000 FPS on a Core i7
  9 | laptop, which means you can run your experiments faster. A known-working RL
 10 | implementation can be found [in this repository](https://github.com/lcswillems/torch-rl).
 11 | 
 12 | Requirements:
 13 | - Python 3.5+
 14 | - OpenAI Gym
 15 | - NumPy
 16 | - PyQT 5 for graphics
 17 | 
 18 | Please use this bibtex if you want to cite this repository in your publications:
 19 | 
 20 | ```
 21 | @misc{gym_minigrid,
 22 |   author = {Chevalier-Boisvert, Maxime and Willems, Lucas and Pal, Suman},
 23 |   title = {Minimalistic Gridworld Environment for OpenAI Gym},
 24 |   year = {2018},
 25 |   publisher = {GitHub},
 26 |   journal = {GitHub repository},
 27 |   howpublished = {\url{https://github.com/maximecb/gym-minigrid}},
 28 | }
 29 | ```
 30 | 
 31 | List of publications & submissions using MiniGrid (please open a pull request to add missing entries):
 32 | - [Learning Effective Subgoals with Multi-Task Hierarchical Reinforcement Learning](http://surl.tirl.info/proceedings/SURL-2019_paper_10.pdf) (Tsinghua University, August 2019)
 33 | - [Learning distant cause and effect using only local and immediate credit assignment](https://arxiv.org/abs/1905.11589) (Incubator 491, May 2019)
 34 | - [Learning World Graphs to Accelerate Hierarchical Reinforcement Learning](https://arxiv.org/abs/1907.00664) (Salesforce Research, 2019)
 35 | - [Modeling the Long Term Future in Model-Based Reinforcement Learning](https://openreview.net/forum?id=SkgQBn0cF7) (Mila, ICLR 2019)
 36 | - [Practical Open-Loop Optimistic Planning](https://arxiv.org/pdf/1904.04700.pdf) (INRIA, Apr 2019)
 37 | - [Unifying Ensemble Methods for Q-learning via Social Choice Theory](https://arxiv.org/pdf/1902.10646.pdf) (Max Planck Institute, Feb 2019)
 38 | - [Planning Beyond The Sensing Horizon Using a Learned Context](https://personalrobotics.cs.washington.edu/workshops/mlmp2018/assets/docs/18_CameraReadySubmission.pdf) (MLMP@IROS, 2018)
 39 | - [Guiding Policies with Language via Meta-Learning](https://arxiv.org/abs/1811.07882) (UC Berkeley, Nov 2018)
 40 | - [On the Complexity of Exploration in Goal-Driven Navigation](https://arxiv.org/abs/1811.06889) (CMU, NIPS, Nov 2018)
 41 | - [Transfer and Exploration via the Information Bottleneck](https://openreview.net/forum?id=rJg8yhAqKm) (Mila, Nov 2018)
 42 | - [Modeling the Long Term Future in Model-Based Reinforcement Learning](https://openreview.net/forum?id=SkgQBn0cF7) (Nov 2018)
 43 | - [Learning of Sophisticated Curriculums by viewing them as Graphs over Tasks](https://openreview.net/forum?id=rJlGdsC9Ym) (ICLR, Nov 2018, withdrawn)
 44 | - [BabyAI: First Steps Towards Grounded Language Learning With a Human In the Loop](https://arxiv.org/abs/1810.08272) (Mila, Oct 2018)
 45 | 
 46 | This environment has been built as part of work done at the [MILA](https://mila.quebec/en/). The Dynamic obstacles environment has been added as part of work done at [IAS in TU Darmstadt](https://www.ias.informatik.tu-darmstadt.de/) and the University of Genoa for mobile robot navigation with dynamic obstacles.
 47 | 
 48 | ## Installation
 49 | 
 50 | There is now a [pip package](https://pypi.org/project/gym-minigrid/) available, which is updated periodically:
 51 | 
 52 | ```
 53 | pip3 install gym-minigrid
 54 | ```
 55 | 
 56 | Alternatively, to get the latest version of MiniGrid, you can clone this repository and install the dependencies with `pip3`:
 57 | 
 58 | ```
 59 | git clone https://github.com/maximecb/gym-minigrid.git
 60 | cd gym-minigrid
 61 | pip3 install -e .
 62 | ```
 63 | 
 64 | ## Basic Usage
 65 | 
 66 | There is a UI application which allows you to manually control the agent with the arrow keys:
 67 | 
 68 | ```
 69 | ./manual_control.py
 70 | ```
 71 | 
 72 | The environment being run can be selected with the `--env-name` option, eg:
 73 | 
 74 | ```
 75 | ./manual_control.py --env-name MiniGrid-Empty-8x8-v0
 76 | ```
 77 | 
 78 | ## Reinforcement Learning
 79 | 
 80 | If you want to train an agent with reinforcement learning, I recommend using the code found in the [torch-rl](https://github.com/lcswillems/torch-rl) repository. This code has been tested and is known to work with this environment. The default hyper-parameters are also known to converge.
 81 | 
 82 | A sample training command is:
 83 | 
 84 | ```
 85 | cd torch-rl
 86 | python3 -m scripts.train --env MiniGrid-Empty-8x8-v0 --algo ppo
 87 | ```
 88 | 
 89 | ## Design
 90 | 
 91 | MiniGrid is built to support tasks involving natural language and sparse rewards.
 92 | The observations are dictionaries, with an 'image' field, partially observable
 93 | view of the environment, a 'mission' field which is a textual string
 94 | describing the objective the agent should reach to get a reward, and a 'direction'
 95 | field which can be used as an optional compass. Using dictionaries makes it
 96 | easy for you to add additional information to observations
 97 | if you need to, without having to force everything into a single tensor.
 98 | If your RL code expects one single tensor for observations, please take a look at
 99 | `FlatObsWrapper` in
100 | [gym_minigrid/wrappers.py](/gym_minigrid/wrappers.py).
101 | 
102 | The partially observable view of the environment uses a compact and efficient
103 | encoding, with just 3 input values per visible grid cell, 7x7x3 values total.
104 | If you want to obtain an array of RGB pixels instead, see the `get_obs_render` method in
105 | [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py).
106 | 
107 | Structure of the world:
108 | - The world is an NxM grid of tiles
109 | - Each tile in the grid world contains zero or one object
110 |   - Cells that do not contain an object have the value `None`
111 | - Each object has an associated discrete color (string)
112 | - Each object has an associated type (string)
113 |   - Provided object types are: wall, floor, lava, door, key, ball, box and goal
114 | - The agent can pick up and carry exactly one object (eg: ball or key)
115 | - To open a locked door, the agent has to be carrying a key matching the door's color
116 | 
117 | Actions in the basic environment:
118 | - Turn left
119 | - Turn right
120 | - Move forward
121 | - Pick up an object
122 | - Drop the object being carried
123 | - Toggle (open doors, interact with objects)
124 | - Done (task completed, optional)
125 | 
126 | By default, sparse rewards are given for reaching a green goal tile. A
127 | reward of 1 is given for success, and zero for failure. There is also an
128 | environment-specific time step limit for completing the task.
129 | You can define your own reward function by creating a class derived
130 | from `MiniGridEnv`. Extending the environment with new object types or action
131 | should be very easy. If you wish to do this, you should take a look at the
132 | [gym_minigrid/minigrid.py](gym_minigrid/minigrid.py) source file.
133 | 
134 | ## Included Environments
135 | 
136 | The environments listed below are implemented in the [gym_minigrid/envs](/gym_minigrid/envs) directory.
137 | Each environment provides one or more configurations registered with OpenAI gym. Each environment
138 | is also programmatically tunable in terms of size/complexity, which is useful for curriculum learning
139 | or to fine-tune difficulty.
140 | 
141 | ### Empty environment
142 | 
143 | Registered configurations:
144 | - `MiniGrid-Empty-5x5-v0`
145 | - `MiniGrid-Empty-Random-5x5-v0`
146 | - `MiniGrid-Empty-6x6-v0`
147 | - `MiniGrid-Empty-Random-6x6-v0`
148 | - `MiniGrid-Empty-8x8-v0`
149 | - `MiniGrid-Empty-16x16-v0`
150 | 
151 | <p align="center">
152 | <img src="/figures/empty-env.png" width=250>
153 | </p>
154 | 
155 | This environment is an empty room, and the goal of the agent is to reach the
156 | green goal square, which provides a sparse reward. A small penalty is
157 | subtracted for the number of steps to reach the goal. This environment is
158 | useful, with small rooms, to validate that your RL algorithm works correctly,
159 | and with large rooms to experiment with sparse rewards and exploration.
160 | The random variants of the environment have the agent starting at a random
161 | position for each episode, while the regular variants have the agent always
162 | starting in the corner opposite to the goal.
163 | 
164 | ### Four rooms environment
165 | 
166 | Registered configurations:
167 | - `MiniGrid-FourRooms-v0`
168 | 
169 | <p align="center">
170 | <img src="/figures/four-rooms-env.png" width=380>
171 | </p>
172 | 
173 | Classic four room reinforcement learning environment. The agent must navigate
174 | in a maze composed of four rooms interconnected by 4 gaps in the walls. To
175 | obtain a reward, the agent must reach the green goal square. Both the agent
176 | and the goal square are randomly placed in any of the four rooms.
177 | 
178 | ### Door & key environment
179 | 
180 | Registered configurations:
181 | - `MiniGrid-DoorKey-5x5-v0`
182 | - `MiniGrid-DoorKey-6x6-v0`
183 | - `MiniGrid-DoorKey-8x8-v0`
184 | - `MiniGrid-DoorKey-16x16-v0`
185 | 
186 | <p align="center">
187 | <img src="/figures/door-key-env.png">
188 | </p>
189 | 
190 | This environment has a key that the agent must pick up in order to unlock
191 | a goal and then get to the green goal square. This environment is difficult,
192 | because of the sparse reward, to solve using classical RL algorithms. It is
193 | useful to experiment with curiosity or curriculum learning.
194 | 
195 | ### Multi-room environment
196 | 
197 | Registered configurations:
198 | - `MiniGrid-MultiRoom-N2-S4-v0` (two small rooms)
199 | - `MiniGrid-MultiRoom-N4-S5-v0` (four rooms)
200 | - `MiniGrid-MultiRoom-N6-v0` (six rooms)
201 | 
202 | <p align="center">
203 | <img src="/figures/multi-room.gif" width=416 height=424>
204 | </p>
205 | 
206 | This environment has a series of connected rooms with doors that must be
207 | opened in order to get to the next room. The final room has the green goal
208 | square the agent must get to. This environment is extremely difficult to
209 | solve using RL alone. However, by gradually increasing the number of
210 | rooms and building a curriculum, the environment can be solved.
211 | 
212 | ### Fetch environment
213 | 
214 | Registered configurations:
215 | - `MiniGrid-Fetch-5x5-N2-v0`
216 | - `MiniGrid-Fetch-6x6-N2-v0`
217 | - `MiniGrid-Fetch-8x8-N3-v0`
218 | 
219 | <p align="center">
220 | <img src="/figures/fetch-env.png" width=450>
221 | </p>
222 | 
223 | This environment has multiple objects of assorted types and colors. The
224 | agent receives a textual string as part of its observation telling it
225 | which object to pick up. Picking up the wrong object produces a negative
226 | reward.
227 | 
228 | ### Go-to-door environment
229 | 
230 | Registered configurations:
231 | - `MiniGrid-GoToDoor-5x5-v0`
232 | - `MiniGrid-GoToDoor-6x6-v0`
233 | - `MiniGrid-GoToDoor-8x8-v0`
234 | 
235 | <p align="center">
236 | <img src="/figures/gotodoor-6x6.png" width=400>
237 | </p>
238 | 
239 | This environment is a room with four doors, one on each wall. The agent
240 | receives a textual (mission) string as input, telling it which door to go to,
241 | (eg: "go to the red door"). It receives a positive reward for performing the
242 | `done` action next to the correct door, as indicated in the mission string.
243 | 
244 | ### Put-near environment
245 | 
246 | Registered configurations:
247 | - `MiniGrid-PutNear-6x6-N2-v0`
248 | - `MiniGrid-PutNear-8x8-N3-v0`
249 | 
250 | The agent is instructed through a textual string to pick up an object and
251 | place it next to another object. This environment is easy to solve with two
252 | objects, but difficult to solve with more, as it involves both textual
253 | understanding and spatial reasoning involving multiple objects.
254 | 
255 | ### Red and blue doors environment
256 | 
257 | Registered configurations:
258 | - `MiniGrid-RedBlueDoors-6x6-v0`
259 | - `MiniGrid-RedBlueDoors-8x8-v0`
260 | 
261 | The purpose of this environment is to test memory.
262 | The agent is randomly placed within a room with one red and one blue door
263 | facing opposite directions. The agent has to open the red door and then open
264 | the blue door, in that order.  The agent, when facing one door, cannot see
265 | the door behind him. Hence, the agent needs to remember whether or not he has
266 | previously opened the other door in order to reliably succeed at completing
267 | the task.
268 | 
269 | ### Memory environment
270 | 
271 | Registered configurations:
272 | - `MiniGrid-MemoryS17Random-v0`
273 | - `MiniGrid-MemoryS13Random-v0`
274 | - `MiniGrid-MemoryS13-v0`
275 | - `MiniGrid-MemoryS11-v0`
276 | - `MiniGrid-MemoryS9-v0`
277 | - `MiniGrid-MemoryS7-v0`
278 | 
279 | This environment is a memory test. The agent starts in a small room
280 | where it sees an object. It then has to go through a narrow hallway
281 | which ends in a split. At each end of the split there is an object,
282 | one of which is the same as the object in the starting room. The
283 | agent has to remember the initial object, and go to the matching
284 | object at split.
285 | 
286 | ### Locked room environment
287 | 
288 | Registed configurations:
289 | - `MiniGrid-LockedRoom-v0`
290 | 
291 | The environment has six rooms, one of which is locked. The agent receives
292 | a textual mission string as input, telling it which room to go to in order
293 | to get the key that opens the locked room. It then has to go into the locked
294 | room in order to reach the final goal. This environment is extremely difficult
295 | to solve with vanilla reinforcement learning alone.
296 | 
297 | ### Key corridor environment
298 | 
299 | Registed configurations:
300 | - `MiniGrid-KeyCorridorS3R1-v0`
301 | - `MiniGrid-KeyCorridorS3R2-v0`
302 | - `MiniGrid-KeyCorridorS3R3-v0`
303 | - `MiniGrid-KeyCorridorS4R3-v0`
304 | - `MiniGrid-KeyCorridorS5R3-v0`
305 | - `MiniGrid-KeyCorridorS6R3-v0`
306 | 
307 | <p align="center">
308 |     <img src="figures/KeyCorridorS3R1.png" width="250">
309 |     <img src="figures/KeyCorridorS3R2.png" width="250">
310 |     <img src="figures/KeyCorridorS3R3.png" width="250">
311 |     <img src="figures/KeyCorridorS4R3.png" width="250">
312 |     <img src="figures/KeyCorridorS5R3.png" width="250">
313 |     <img src="figures/KeyCorridorS6R3.png" width="250">
314 | </p>
315 | 
316 | This environment is similar to the locked room environment, but there are
317 | multiple registered environment configurations of increasing size,
318 | making it easier to use curriculum learning to train an agent to solve it.
319 | The agent has to pick up an object which is behind a locked door. The key is
320 | hidden in another room, and the agent has to explore the environment to find
321 | it. The mission string does not give the agent any clues as to where the
322 | key is placed. This environment can be solved without relying on language.
323 | 
324 | ### Unlock environment
325 | 
326 | Registed configurations:
327 | - `MiniGrid-Unlock-v0`
328 | 
329 | <p align="center">
330 |     <img src="figures/Unlock.png" width="200">
331 | </p>
332 | 
333 | The agent has to open a locked door. This environment can be solved without
334 | relying on language.
335 | 
336 | ### Unlock pickup environment
337 | 
338 | Registed configurations:
339 | - `MiniGrid-UnlockPickup-v0`
340 | 
341 | <p align="center">
342 |     <img src="figures/UnlockPickup.png" width="250">
343 | </p>
344 | 
345 | The agent has to pick up a box which is placed in another room, behind a
346 | locked door. This environment can be solved without relying on language.
347 | 
348 | ### Blocked unlock pickup environment
349 | 
350 | Registed configurations:
351 | - `MiniGrid-BlockedUnlockPickup-v0`
352 | 
353 | <p align="center">
354 |     <img src="figures/BlockedUnlockPickup.png" width="250">
355 | </p>
356 | 
357 | The agent has to pick up a box which is placed in another room, behind a
358 | locked door. The door is also blocked by a ball which the agent has to move
359 | before it can unlock the door. Hence, the agent has to learn to move the ball,
360 | pick up the key, open the door and pick up the object in the other room.
361 | This environment can be solved without relying on language.
362 | 
363 | ## Obstructed maze environment
364 | 
365 | Registered configurations:
366 | - `MiniGrid-ObstructedMaze-1Dl-v0`
367 | - `MiniGrid-ObstructedMaze-1Dlh-v0`
368 | - `MiniGrid-ObstructedMaze-1Dlhb-v0`
369 | - `MiniGrid-ObstructedMaze-2Dl-v0`
370 | - `MiniGrid-ObstructedMaze-2Dlh-v0`
371 | - `MiniGrid-ObstructedMaze-2Dlhb-v0`
372 | - `MiniGrid-ObstructedMaze-1Q-v0`
373 | - `MiniGrid-ObstructedMaze-2Q-v0`
374 | - `MiniGrid-ObstructedMaze-Full-v0`
375 | 
376 | <p align="center">
377 |   <img src="figures/ObstructedMaze-1Dl.png" width="250">
378 |   <img src="figures/ObstructedMaze-1Dlh.png" width="250">
379 |   <img src="figures/ObstructedMaze-1Dlhb.png" width="250">
380 |   <img src="figures/ObstructedMaze-2Dl.png" width="100">
381 |   <img src="figures/ObstructedMaze-2Dlh.png" width="100">
382 |   <img src="figures/ObstructedMaze-2Dlhb.png" width="100">
383 |   <img src="figures/ObstructedMaze-1Q.png" width="250">
384 |   <img src="figures/ObstructedMaze-2Q.png" width="250">
385 |   <img src="figures/ObstructedMaze-4Q.png" width="250">
386 | </p>
387 | 
388 | The agent has to pick up a box which is placed in a corner of a 3x3 maze.
389 | The doors are locked, the keys are hidden in boxes and doors are obstructed
390 | by balls. This environment can be solved without relying on language.
391 | 
392 | The agent has to pick up a box which is placed in a corner of a 3x3 maze.
393 | The doors are locked, the keys are hidden in boxes and doors are obstructed
394 | by balls. This environment can be solved without relying on language.
395 | 
396 | ## Lava crossing environment
397 | 
398 | Registered configurations:
399 | - `MiniGrid-LavaCrossingS9N1-v0`
400 | - `MiniGrid-LavaCrossingS9N2-v0`
401 | - `MiniGrid-LavaCrossingS9N3-v0`
402 | - `MiniGrid-LavaCrossingS11N5-v0`
403 | 
404 | <p align="center">
405 |   <img src="figures/LavaCrossingS9N1.png" width="200">
406 |   <img src="figures/LavaCrossingS9N2.png" width="200">
407 |   <img src="figures/LavaCrossingS9N3.png" width="200">
408 |   <img src="figures/LavaCrossingS11N5.png" width="250">
409 | </p>
410 | 
411 | The agent has to reach the green goal square on the other corner of the room
412 | while avoiding rivers of deadly lava which terminate the episode in failure.
413 | Each lava stream runs across the room either horizontally or vertically, and
414 | has a single crossing point which can be safely used;  Luckily, a path to the
415 | goal is guaranteed to exist. This environment is useful for studying safety and
416 | safe exploration.
417 | 
418 | ## Distributional shift environment
419 | 
420 | Registered configurations:
421 | - `MiniGrid-DistShift1-v0`
422 | - `MiniGrid-DistShift2-v0`
423 | 
424 | This environment is based on one of the DeepMind [AI safety gridworlds](https://github.com/deepmind/ai-safety-gridworlds).
425 | The agent starts in the top-left corner and must reach the goal which is in the top-right corner, but has to avoid stepping
426 | into lava on its way. The aim of this environment is to test an agent's ability to generalize. There are two slightly
427 | different variants of the environment, so that the agent can be trained on one variant and tested on the other.
428 | 
429 | <p align="center">
430 |   <img src="figures/DistShift1.png" width="200">
431 |   <img src="figures/DistShift2.png" width="200">
432 | </p>
433 | 
434 | ## Simple crossing environment
435 | 
436 | Registered configurations:
437 | - `MiniGrid-SimpleCrossingS9N1-v0`
438 | - `MiniGrid-SimpleCrossingS9N2-v0`
439 | - `MiniGrid-SimpleCrossingS9N3-v0`
440 | - `MiniGrid-SimpleCrossingS11N5-v0`
441 | 
442 | <p align="center">
443 |   <img src="figures/SimpleCrossingS9N1.png" width="200">
444 |   <img src="figures/SimpleCrossingS9N2.png" width="200">
445 |   <img src="figures/SimpleCrossingS9N3.png" width="200">
446 |   <img src="figures/SimpleCrossingS11N5.png" width="250">
447 | </p>
448 | 
449 | Similar to the `LavaCrossing` environment, the agent has to reach the green
450 | goal square on the other corner of the room, however lava is replaced by
451 | walls. This MDP is therefore much easier and and maybe useful for quickly
452 | testing your algorithms.
453 | 
454 | ### Dynamic obstacles environment
455 | 
456 | Registered configurations:
457 | - `MiniGrid-Dynamic-Obstacles-5x5-v0`
458 | - `MiniGrid-Dynamic-Obstacles-Random-5x5-v0`
459 | - `MiniGrid-Dynamic-Obstacles-6x6-v0`
460 | - `MiniGrid-Dynamic-Obstacles-Random-6x6-v0`
461 | - `MiniGrid-Dynamic-Obstacles-8x8-v0`
462 | - `MiniGrid-Dynamic-Obstacles-16x16-v0`
463 | 
464 | <p align="center">
465 | <img src="/figures/dynamic_obstacles.gif">
466 | </p>
467 | 
468 | This environment is an empty room with moving obstacles. The goal of the agent is to reach the green goal square without colliding with any obstacle. A large penalty is subtracted if the agent collides with an obstacle and the episode finishes. This environment is useful to test Dynamic Obstacle Avoidance for mobile robots with Reinforcement Learning in Partial Observability.
469 | 


--------------------------------------------------------------------------------
/gym_minigrid/minigrid.py:
--------------------------------------------------------------------------------
   1 | import math
   2 | import gym
   3 | from enum import IntEnum
   4 | import numpy as np
   5 | from gym import error, spaces, utils
   6 | from gym.utils import seeding
   7 | 
   8 | # Size in pixels of a cell in the full-scale human view
   9 | CELL_PIXELS = 32
  10 | 
  11 | # Map of color names to RGB values
  12 | COLORS = {
  13 |     'red'   : np.array([255, 0, 0]),
  14 |     'green' : np.array([0, 255, 0]),
  15 |     'blue'  : np.array([0, 0, 255]),
  16 |     'purple': np.array([112, 39, 195]),
  17 |     'yellow': np.array([255, 255, 0]),
  18 |     'grey'  : np.array([100, 100, 100])
  19 | }
  20 | 
  21 | COLOR_NAMES = sorted(list(COLORS.keys()))
  22 | 
  23 | # Used to map colors to integers
  24 | COLOR_TO_IDX = {
  25 |     'red'   : 0,
  26 |     'green' : 1,
  27 |     'blue'  : 2,
  28 |     'purple': 3,
  29 |     'yellow': 4,
  30 |     'grey'  : 5
  31 | }
  32 | 
  33 | IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
  34 | 
  35 | # Map of object type to integers
  36 | OBJECT_TO_IDX = {
  37 |     'unseen'        : 0,
  38 |     'empty'         : 1,
  39 |     'wall'          : 2,
  40 |     'floor'         : 3,
  41 |     'door'          : 4,
  42 |     'key'           : 5,
  43 |     'ball'          : 6,
  44 |     'box'           : 7,
  45 |     'goal'          : 8,
  46 |     'lava'          : 9,
  47 |     'agent'         : 10,
  48 | }
  49 | 
  50 | IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
  51 | 
  52 | # Map of agent direction indices to vectors
  53 | DIR_TO_VEC = [
  54 |     # Pointing right (positive X)
  55 |     np.array((1, 0)),
  56 |     # Down (positive Y)
  57 |     np.array((0, 1)),
  58 |     # Pointing left (negative X)
  59 |     np.array((-1, 0)),
  60 |     # Up (negative Y)
  61 |     np.array((0, -1)),
  62 | ]
  63 | 
  64 | class WorldObj:
  65 |     """
  66 |     Base class for grid world objects
  67 |     """
  68 | 
  69 |     def __init__(self, type, color):
  70 |         assert type in OBJECT_TO_IDX, type
  71 |         assert color in COLOR_TO_IDX, color
  72 |         self.type = type
  73 |         self.color = color
  74 |         self.contains = None
  75 | 
  76 |         # Initial position of the object
  77 |         self.init_pos = None
  78 | 
  79 |         # Current position of the object
  80 |         self.cur_pos = None
  81 | 
  82 |     def can_overlap(self):
  83 |         """Can the agent overlap with this?"""
  84 |         return False
  85 | 
  86 |     def can_pickup(self):
  87 |         """Can the agent pick this up?"""
  88 |         return False
  89 | 
  90 |     def can_contain(self):
  91 |         """Can this contain another object?"""
  92 |         return False
  93 | 
  94 |     def see_behind(self):
  95 |         """Can the agent see behind this object?"""
  96 |         return True
  97 | 
  98 |     def toggle(self, env, pos):
  99 |         """Method to trigger/toggle an action this object performs"""
 100 |         return False
 101 | 
 102 |     def render(self, r):
 103 |         """Draw this object with the given renderer"""
 104 |         raise NotImplementedError
 105 | 
 106 |     def _set_color(self, r):
 107 |         """Set the color of this object as the active drawing color"""
 108 |         c = COLORS[self.color]
 109 |         r.setLineColor(c[0], c[1], c[2])
 110 |         r.setColor(c[0], c[1], c[2])
 111 | 
 112 | class Goal(WorldObj):
 113 |     def __init__(self):
 114 |         super().__init__('goal', 'green')
 115 | 
 116 |     def can_overlap(self):
 117 |         return True
 118 | 
 119 |     def render(self, r):
 120 |         self._set_color(r)
 121 |         r.drawPolygon([
 122 |             (0          , CELL_PIXELS),
 123 |             (CELL_PIXELS, CELL_PIXELS),
 124 |             (CELL_PIXELS,           0),
 125 |             (0          ,           0)
 126 |         ])
 127 | 
 128 | class Floor(WorldObj):
 129 |     """
 130 |     Colored floor tile the agent can walk over
 131 |     """
 132 | 
 133 |     def __init__(self, color='blue'):
 134 |         super().__init__('floor', color)
 135 | 
 136 |     def can_overlap(self):
 137 |         return True
 138 | 
 139 |     def render(self, r):
 140 |         # Give the floor a pale color
 141 |         c = COLORS[self.color]
 142 |         r.setLineColor(100, 100, 100, 0)
 143 |         r.setColor(*c/2)
 144 |         r.drawPolygon([
 145 |             (1          , CELL_PIXELS),
 146 |             (CELL_PIXELS, CELL_PIXELS),
 147 |             (CELL_PIXELS,           1),
 148 |             (1          ,           1)
 149 |         ])
 150 | 
 151 | class Lava(WorldObj):
 152 |     def __init__(self):
 153 |         super().__init__('lava', 'red')
 154 | 
 155 |     def can_overlap(self):
 156 |         return True
 157 | 
 158 |     def render(self, r):
 159 |         orange = 255, 128, 0
 160 |         r.setLineColor(*orange)
 161 |         r.setColor(*orange)
 162 |         r.drawPolygon([
 163 |             (0          , CELL_PIXELS),
 164 |             (CELL_PIXELS, CELL_PIXELS),
 165 |             (CELL_PIXELS, 0),
 166 |             (0          , 0)
 167 |         ])
 168 | 
 169 |         # drawing the waves
 170 |         r.setLineColor(0, 0, 0)
 171 | 
 172 |         r.drawPolyline([
 173 |             (.1 * CELL_PIXELS, .3 * CELL_PIXELS),
 174 |             (.3 * CELL_PIXELS, .4 * CELL_PIXELS),
 175 |             (.5 * CELL_PIXELS, .3 * CELL_PIXELS),
 176 |             (.7 * CELL_PIXELS, .4 * CELL_PIXELS),
 177 |             (.9 * CELL_PIXELS, .3 * CELL_PIXELS),
 178 |         ])
 179 | 
 180 |         r.drawPolyline([
 181 |             (.1 * CELL_PIXELS, .5 * CELL_PIXELS),
 182 |             (.3 * CELL_PIXELS, .6 * CELL_PIXELS),
 183 |             (.5 * CELL_PIXELS, .5 * CELL_PIXELS),
 184 |             (.7 * CELL_PIXELS, .6 * CELL_PIXELS),
 185 |             (.9 * CELL_PIXELS, .5 * CELL_PIXELS),
 186 |         ])
 187 | 
 188 |         r.drawPolyline([
 189 |             (.1 * CELL_PIXELS, .7 * CELL_PIXELS),
 190 |             (.3 * CELL_PIXELS, .8 * CELL_PIXELS),
 191 |             (.5 * CELL_PIXELS, .7 * CELL_PIXELS),
 192 |             (.7 * CELL_PIXELS, .8 * CELL_PIXELS),
 193 |             (.9 * CELL_PIXELS, .7 * CELL_PIXELS),
 194 |         ])
 195 | 
 196 | class Wall(WorldObj):
 197 |     def __init__(self, color='grey'):
 198 |         super().__init__('wall', color)
 199 | 
 200 |     def see_behind(self):
 201 |         return False
 202 | 
 203 |     def render(self, r):
 204 |         self._set_color(r)
 205 |         r.drawPolygon([
 206 |             (0          , CELL_PIXELS),
 207 |             (CELL_PIXELS, CELL_PIXELS),
 208 |             (CELL_PIXELS,           0),
 209 |             (0          ,           0)
 210 |         ])
 211 | 
 212 | class Door(WorldObj):
 213 |     def __init__(self, color, is_open=False, is_locked=False):
 214 |         super().__init__('door', color)
 215 |         self.is_open = is_open
 216 |         self.is_locked = is_locked
 217 | 
 218 |     def can_overlap(self):
 219 |         """The agent can only walk over this cell when the door is open"""
 220 |         return self.is_open
 221 | 
 222 |     def see_behind(self):
 223 |         return self.is_open
 224 | 
 225 |     def toggle(self, env, pos):
 226 |         # If the player has the right key to open the door
 227 |         if self.is_locked:
 228 |             if isinstance(env.carrying, Key) and env.carrying.color == self.color:
 229 |                 self.is_locked = False
 230 |                 self.is_open = True
 231 |                 return True
 232 |             return False
 233 | 
 234 |         self.is_open = not self.is_open
 235 |         return True
 236 | 
 237 |     def render(self, r):
 238 |         c = COLORS[self.color]
 239 |         r.setLineColor(c[0], c[1], c[2])
 240 |         r.setColor(c[0], c[1], c[2], 50 if self.is_locked else 0)
 241 | 
 242 |         if self.is_open:
 243 |             r.drawPolygon([
 244 |                 (CELL_PIXELS-2, CELL_PIXELS),
 245 |                 (CELL_PIXELS  , CELL_PIXELS),
 246 |                 (CELL_PIXELS  ,           0),
 247 |                 (CELL_PIXELS-2,           0)
 248 |             ])
 249 |             return
 250 | 
 251 |         r.drawPolygon([
 252 |             (0          , CELL_PIXELS),
 253 |             (CELL_PIXELS, CELL_PIXELS),
 254 |             (CELL_PIXELS,           0),
 255 |             (0          ,           0)
 256 |         ])
 257 |         r.drawPolygon([
 258 |             (2            , CELL_PIXELS-2),
 259 |             (CELL_PIXELS-2, CELL_PIXELS-2),
 260 |             (CELL_PIXELS-2,           2),
 261 |             (2            ,           2)
 262 |         ])
 263 | 
 264 |         if self.is_locked:
 265 |             # Draw key slot
 266 |             r.drawLine(
 267 |                 CELL_PIXELS * 0.55,
 268 |                 CELL_PIXELS * 0.5,
 269 |                 CELL_PIXELS * 0.75,
 270 |                 CELL_PIXELS * 0.5
 271 |             )
 272 |         else:
 273 |             # Draw door handle
 274 |             r.drawCircle(CELL_PIXELS * 0.75, CELL_PIXELS * 0.5, 2)
 275 | 
 276 | class Key(WorldObj):
 277 |     def __init__(self, color='blue'):
 278 |         super(Key, self).__init__('key', color)
 279 | 
 280 |     def can_pickup(self):
 281 |         return True
 282 | 
 283 |     def render(self, r):
 284 |         self._set_color(r)
 285 | 
 286 |         # Vertical quad
 287 |         r.drawPolygon([
 288 |             (16, 10),
 289 |             (20, 10),
 290 |             (20, 28),
 291 |             (16, 28)
 292 |         ])
 293 | 
 294 |         # Teeth
 295 |         r.drawPolygon([
 296 |             (12, 19),
 297 |             (16, 19),
 298 |             (16, 21),
 299 |             (12, 21)
 300 |         ])
 301 |         r.drawPolygon([
 302 |             (12, 26),
 303 |             (16, 26),
 304 |             (16, 28),
 305 |             (12, 28)
 306 |         ])
 307 | 
 308 |         r.drawCircle(18, 9, 6)
 309 |         r.setLineColor(0, 0, 0)
 310 |         r.setColor(0, 0, 0)
 311 |         r.drawCircle(18, 9, 2)
 312 | 
 313 | class Ball(WorldObj):
 314 |     def __init__(self, color='blue'):
 315 |         super(Ball, self).__init__('ball', color)
 316 | 
 317 |     def can_pickup(self):
 318 |         return True
 319 | 
 320 |     def render(self, r):
 321 |         self._set_color(r)
 322 |         r.drawCircle(CELL_PIXELS * 0.5, CELL_PIXELS * 0.5, 10)
 323 | 
 324 | class Box(WorldObj):
 325 |     def __init__(self, color, contains=None):
 326 |         super(Box, self).__init__('box', color)
 327 |         self.contains = contains
 328 | 
 329 |     def can_pickup(self):
 330 |         return True
 331 | 
 332 |     def render(self, r):
 333 |         c = COLORS[self.color]
 334 |         r.setLineColor(c[0], c[1], c[2])
 335 |         r.setColor(0, 0, 0)
 336 |         r.setLineWidth(2)
 337 | 
 338 |         r.drawPolygon([
 339 |             (4            , CELL_PIXELS-4),
 340 |             (CELL_PIXELS-4, CELL_PIXELS-4),
 341 |             (CELL_PIXELS-4,             4),
 342 |             (4            ,             4)
 343 |         ])
 344 | 
 345 |         r.drawLine(
 346 |             4,
 347 |             CELL_PIXELS / 2,
 348 |             CELL_PIXELS - 4,
 349 |             CELL_PIXELS / 2
 350 |         )
 351 | 
 352 |         r.setLineWidth(1)
 353 | 
 354 |     def toggle(self, env, pos):
 355 |         # Replace the box by its contents
 356 |         env.grid.set(*pos, self.contains)
 357 |         return True
 358 | 
 359 | class Grid:
 360 |     """
 361 |     Represent a grid and operations on it
 362 |     """
 363 | 
 364 |     def __init__(self, width, height):
 365 |         assert width >= 3
 366 |         assert height >= 3
 367 | 
 368 |         self.width = width
 369 |         self.height = height
 370 | 
 371 |         self.grid = [None] * width * height
 372 | 
 373 |     def __contains__(self, key):
 374 |         if isinstance(key, WorldObj):
 375 |             for e in self.grid:
 376 |                 if e is key:
 377 |                     return True
 378 |         elif isinstance(key, tuple):
 379 |             for e in self.grid:
 380 |                 if e is None:
 381 |                     continue
 382 |                 if (e.color, e.type) == key:
 383 |                     return True
 384 |                 if key[0] is None and key[1] == e.type:
 385 |                     return True
 386 |         return False
 387 | 
 388 |     def __eq__(self, other):
 389 |         grid1 = self.encode()
 390 |         grid2 = other.encode()
 391 |         return np.array_equal(grid2, grid1)
 392 | 
 393 |     def __ne__(self, other):
 394 |         return not self == other
 395 | 
 396 |     def copy(self):
 397 |         from copy import deepcopy
 398 |         return deepcopy(self)
 399 | 
 400 |     def set(self, i, j, v):
 401 |         assert i >= 0 and i < self.width
 402 |         assert j >= 0 and j < self.height
 403 |         self.grid[j * self.width + i] = v
 404 | 
 405 |     def get(self, i, j):
 406 |         assert i >= 0 and i < self.width
 407 |         assert j >= 0 and j < self.height
 408 |         return self.grid[j * self.width + i]
 409 | 
 410 |     def horz_wall(self, x, y, length=None):
 411 |         if length is None:
 412 |             length = self.width - x
 413 |         for i in range(0, length):
 414 |             self.set(x + i, y, Wall())
 415 | 
 416 |     def vert_wall(self, x, y, length=None):
 417 |         if length is None:
 418 |             length = self.height - y
 419 |         for j in range(0, length):
 420 |             self.set(x, y + j, Wall())
 421 | 
 422 |     def wall_rect(self, x, y, w, h):
 423 |         self.horz_wall(x, y, w)
 424 |         self.horz_wall(x, y+h-1, w)
 425 |         self.vert_wall(x, y, h)
 426 |         self.vert_wall(x+w-1, y, h)
 427 | 
 428 |     def rotate_left(self):
 429 |         """
 430 |         Rotate the grid to the left (counter-clockwise)
 431 |         """
 432 | 
 433 |         grid = Grid(self.height, self.width)
 434 | 
 435 |         for i in range(self.width):
 436 |             for j in range(self.height):
 437 |                 v = self.get(i, j)
 438 |                 grid.set(j, grid.height - 1 - i, v)
 439 | 
 440 |         return grid
 441 | 
 442 |     def slice(self, topX, topY, width, height):
 443 |         """
 444 |         Get a subset of the grid
 445 |         """
 446 | 
 447 |         grid = Grid(width, height)
 448 | 
 449 |         for j in range(0, height):
 450 |             for i in range(0, width):
 451 |                 x = topX + i
 452 |                 y = topY + j
 453 | 
 454 |                 if x >= 0 and x < self.width and \
 455 |                    y >= 0 and y < self.height:
 456 |                     v = self.get(x, y)
 457 |                 else:
 458 |                     v = Wall()
 459 | 
 460 |                 grid.set(i, j, v)
 461 | 
 462 |         return grid
 463 | 
 464 |     def render(self, r, tile_size):
 465 |         """
 466 |         Render this grid at a given scale
 467 |         :param r: target renderer object
 468 |         :param tile_size: tile size in pixels
 469 |         """
 470 | 
 471 |         assert r.width == self.width * tile_size
 472 |         assert r.height == self.height * tile_size
 473 | 
 474 |         # Total grid size at native scale
 475 |         widthPx = self.width * CELL_PIXELS
 476 |         heightPx = self.height * CELL_PIXELS
 477 | 
 478 |         r.push()
 479 | 
 480 |         # Internally, we draw at the "large" full-grid resolution, but we
 481 |         # use the renderer to scale back to the desired size
 482 |         r.scale(tile_size / CELL_PIXELS, tile_size / CELL_PIXELS)
 483 | 
 484 |         # Draw the background of the in-world cells black
 485 |         r.fillRect(
 486 |             0,
 487 |             0,
 488 |             widthPx,
 489 |             heightPx,
 490 |             0, 0, 0
 491 |         )
 492 | 
 493 |         # Draw grid lines
 494 |         r.setLineColor(100, 100, 100)
 495 |         for rowIdx in range(0, self.height):
 496 |             y = CELL_PIXELS * rowIdx
 497 |             r.drawLine(0, y, widthPx, y)
 498 |         for colIdx in range(0, self.width):
 499 |             x = CELL_PIXELS * colIdx
 500 |             r.drawLine(x, 0, x, heightPx)
 501 | 
 502 |         # Render the grid
 503 |         for j in range(0, self.height):
 504 |             for i in range(0, self.width):
 505 |                 cell = self.get(i, j)
 506 |                 if cell == None:
 507 |                     continue
 508 |                 r.push()
 509 |                 r.translate(i * CELL_PIXELS, j * CELL_PIXELS)
 510 |                 cell.render(r)
 511 |                 r.pop()
 512 | 
 513 |         r.pop()
 514 | 
 515 |     def encode(self, vis_mask=None):
 516 |         """
 517 |         Produce a compact numpy encoding of the grid
 518 |         """
 519 | 
 520 |         if vis_mask is None:
 521 |             vis_mask = np.ones((self.width, self.height), dtype=bool)
 522 | 
 523 |         array = np.zeros((self.width, self.height, 3), dtype='uint8')
 524 |         for i in range(self.width):
 525 |             for j in range(self.height):
 526 |                 if vis_mask[i, j]:
 527 |                     v = self.get(i, j)
 528 | 
 529 |                     if v is None:
 530 |                         array[i, j, 0] = OBJECT_TO_IDX['empty']
 531 |                         array[i, j, 1] = 0
 532 |                         array[i, j, 2] = 0
 533 |                     else:
 534 |                         # State, 0: open, 1: closed, 2: locked
 535 |                         state = 0
 536 |                         if hasattr(v, 'is_open') and not v.is_open:
 537 |                             state = 1
 538 |                         if hasattr(v, 'is_locked') and v.is_locked:
 539 |                             state = 2
 540 | 
 541 |                         array[i, j, 0] = OBJECT_TO_IDX[v.type]
 542 |                         array[i, j, 1] = COLOR_TO_IDX[v.color]
 543 |                         array[i, j, 2] = state
 544 | 
 545 |         return array
 546 | 
 547 |     @staticmethod
 548 |     def decode(array):
 549 |         """
 550 |         Decode an array grid encoding back into a grid
 551 |         """
 552 | 
 553 |         width, height, channels = array.shape
 554 |         assert channels == 3
 555 | 
 556 |         grid = Grid(width, height)
 557 |         for i in range(width):
 558 |             for j in range(height):
 559 |                 typeIdx, colorIdx, state = array[i, j]
 560 | 
 561 |                 if typeIdx == OBJECT_TO_IDX['unseen'] or \
 562 |                         typeIdx == OBJECT_TO_IDX['empty']:
 563 |                     continue
 564 | 
 565 |                 objType = IDX_TO_OBJECT[typeIdx]
 566 |                 color = IDX_TO_COLOR[colorIdx]
 567 |                 # State, 0: open, 1: closed, 2: locked
 568 |                 is_open = state == 0
 569 |                 is_locked = state == 2
 570 | 
 571 |                 if objType == 'wall':
 572 |                     v = Wall(color)
 573 |                 elif objType == 'floor':
 574 |                     v = Floor(color)
 575 |                 elif objType == 'ball':
 576 |                     v = Ball(color)
 577 |                 elif objType == 'key':
 578 |                     v = Key(color)
 579 |                 elif objType == 'box':
 580 |                     v = Box(color)
 581 |                 elif objType == 'door':
 582 |                     v = Door(color, is_open, is_locked)
 583 |                 elif objType == 'goal':
 584 |                     v = Goal()
 585 |                 elif objType == 'lava':
 586 |                     v = Lava()
 587 |                 else:
 588 |                     assert False, "unknown obj type in decode '%s'" % objType
 589 | 
 590 |                 grid.set(i, j, v)
 591 | 
 592 |         return grid
 593 | 
 594 |     def process_vis(grid, agent_pos):
 595 |         mask = np.zeros(shape=(grid.width, grid.height), dtype=np.bool)
 596 | 
 597 |         mask[agent_pos[0], agent_pos[1]] = True
 598 | 
 599 |         for j in reversed(range(0, grid.height)):
 600 |             for i in range(0, grid.width-1):
 601 |                 if not mask[i, j]:
 602 |                     continue
 603 | 
 604 |                 cell = grid.get(i, j)
 605 |                 if cell and not cell.see_behind():
 606 |                     continue
 607 | 
 608 |                 mask[i+1, j] = True
 609 |                 if j > 0:
 610 |                     mask[i+1, j-1] = True
 611 |                     mask[i, j-1] = True
 612 | 
 613 |             for i in reversed(range(1, grid.width)):
 614 |                 if not mask[i, j]:
 615 |                     continue
 616 | 
 617 |                 cell = grid.get(i, j)
 618 |                 if cell and not cell.see_behind():
 619 |                     continue
 620 | 
 621 |                 mask[i-1, j] = True
 622 |                 if j > 0:
 623 |                     mask[i-1, j-1] = True
 624 |                     mask[i, j-1] = True
 625 | 
 626 |         for j in range(0, grid.height):
 627 |             for i in range(0, grid.width):
 628 |                 if not mask[i, j]:
 629 |                     grid.set(i, j, None)
 630 | 
 631 |         return mask
 632 | 
 633 | class MiniGridEnv(gym.Env):
 634 |     """
 635 |     2D grid world game environment
 636 |     """
 637 | 
 638 |     metadata = {
 639 |         'render.modes': ['human', 'rgb_array', 'pixmap'],
 640 |         'video.frames_per_second' : 10
 641 |     }
 642 | 
 643 |     # Enumeration of possible actions
 644 |     class Actions(IntEnum):
 645 |         # Turn left, turn right, move forward
 646 |         left = 0
 647 |         right = 1
 648 |         forward = 2
 649 | 
 650 |         # Pick up an object
 651 |         pickup = 3
 652 |         # Drop an object
 653 |         drop = 4
 654 |         # Toggle/activate an object
 655 |         toggle = 5
 656 | 
 657 |         # Done completing task
 658 |         done = 6
 659 | 
 660 |     def __init__(
 661 |         self,
 662 |         grid_size=None,
 663 |         width=None,
 664 |         height=None,
 665 |         max_steps=100,
 666 |         see_through_walls=False,
 667 |         seed=1337,
 668 |         agent_view_size=7
 669 |     ):
 670 |         # Can't set both grid_size and width/height
 671 |         if grid_size:
 672 |             assert width == None and height == None
 673 |             width = grid_size
 674 |             height = grid_size
 675 | 
 676 |         # Action enumeration for this environment
 677 |         self.actions = MiniGridEnv.Actions
 678 | 
 679 |         # Actions are discrete integer values
 680 |         self.action_space = spaces.Discrete(len(self.actions))
 681 | 
 682 |         # Number of cells (width and height) in the agent view
 683 |         self.agent_view_size = agent_view_size
 684 | 
 685 |         # Observations are dictionaries containing an
 686 |         # encoding of the grid and a textual 'mission' string
 687 |         self.observation_space = spaces.Box(
 688 |             low=0,
 689 |             high=255,
 690 |             shape=(self.agent_view_size, self.agent_view_size, 3),
 691 |             dtype='uint8'
 692 |         )
 693 |         self.observation_space = spaces.Dict({
 694 |             'image': self.observation_space
 695 |         })
 696 | 
 697 |         # Range of possible rewards
 698 |         self.reward_range = (0, 1)
 699 | 
 700 |         # Renderer object used to render the whole grid (full-scale)
 701 |         self.grid_render = None
 702 | 
 703 |         # Renderer used to render observations (small-scale agent view)
 704 |         self.obs_render = None
 705 | 
 706 |         # Environment configuration
 707 |         self.width = width
 708 |         self.height = height
 709 |         self.max_steps = max_steps
 710 |         self.see_through_walls = see_through_walls
 711 | 
 712 |         # Current position and direction of the agent
 713 |         self.agent_pos = None
 714 |         self.agent_dir = None
 715 | 
 716 |         # Initialize the RNG
 717 |         self.seed(seed=seed)
 718 | 
 719 |         # Initialize the state
 720 |         self.reset()
 721 | 
 722 |     def reset(self):
 723 |         # Current position and direction of the agent
 724 |         self.agent_pos = None
 725 |         self.agent_dir = None
 726 | 
 727 |         # Generate a new random grid at the start of each episode
 728 |         # To keep the same grid for each episode, call env.seed() with
 729 |         # the same seed before calling env.reset()
 730 |         self._gen_grid(self.width, self.height)
 731 | 
 732 |         # These fields should be defined by _gen_grid
 733 |         assert self.agent_pos is not None
 734 |         assert self.agent_dir is not None
 735 | 
 736 |         # Check that the agent doesn't overlap with an object
 737 |         start_cell = self.grid.get(*self.agent_pos)
 738 |         assert start_cell is None or start_cell.can_overlap()
 739 | 
 740 |         # Item picked up, being carried, initially nothing
 741 |         self.carrying = None
 742 | 
 743 |         # Step count since episode start
 744 |         self.step_count = 0
 745 | 
 746 |         # Return first observation
 747 |         obs = self.gen_obs()
 748 |         return obs
 749 | 
 750 |     def seed(self, seed=1337):
 751 |         # Seed the random number generator
 752 |         self.np_random, _ = seeding.np_random(seed)
 753 |         return [seed]
 754 | 
 755 |     @property
 756 |     def steps_remaining(self):
 757 |         return self.max_steps - self.step_count
 758 | 
 759 |     def __str__(self):
 760 |         """
 761 |         Produce a pretty string of the environment's grid along with the agent.
 762 |         A grid cell is represented by 2-character string, the first one for
 763 |         the object and the second one for the color.
 764 |         """
 765 | 
 766 |         # Map of object types to short string
 767 |         OBJECT_TO_STR = {
 768 |             'wall'          : 'W',
 769 |             'floor'         : 'F',
 770 |             'door'          : 'D',
 771 |             'key'           : 'K',
 772 |             'ball'          : 'A',
 773 |             'box'           : 'B',
 774 |             'goal'          : 'G',
 775 |             'lava'          : 'V',
 776 |         }
 777 | 
 778 |         # Short string for opened door
 779 |         OPENDED_DOOR_IDS = '_'
 780 | 
 781 |         # Map agent's direction to short string
 782 |         AGENT_DIR_TO_STR = {
 783 |             0: '>',
 784 |             1: 'V',
 785 |             2: '<',
 786 |             3: '^'
 787 |         }
 788 | 
 789 |         str = ''
 790 | 
 791 |         for j in range(self.grid.height):
 792 | 
 793 |             for i in range(self.grid.width):
 794 |                 if i == self.agent_pos[0] and j == self.agent_pos[1]:
 795 |                     str += 2 * AGENT_DIR_TO_STR[self.agent_dir]
 796 |                     continue
 797 | 
 798 |                 c = self.grid.get(i, j)
 799 | 
 800 |                 if c == None:
 801 |                     str += '  '
 802 |                     continue
 803 | 
 804 |                 if c.type == 'door':
 805 |                     if c.is_open:
 806 |                         str += '__'
 807 |                     elif c.is_locked:
 808 |                         str += 'L' + c.color[0].upper()
 809 |                     else:
 810 |                         str += 'D' + c.color[0].upper()
 811 |                     continue
 812 | 
 813 |                 str += OBJECT_TO_STR[c.type] + c.color[0].upper()
 814 | 
 815 |             if j < self.grid.height - 1:
 816 |                 str += '\n'
 817 | 
 818 |         return str
 819 | 
 820 |     def _gen_grid(self, width, height):
 821 |         assert False, "_gen_grid needs to be implemented by each environment"
 822 | 
 823 |     def _reward(self):
 824 |         """
 825 |         Compute the reward to be given upon success
 826 |         """
 827 | 
 828 |         return 1 - 0.9 * (self.step_count / self.max_steps)
 829 | 
 830 |     def _rand_int(self, low, high):
 831 |         """
 832 |         Generate random integer in [low,high[
 833 |         """
 834 | 
 835 |         return self.np_random.randint(low, high)
 836 | 
 837 |     def _rand_float(self, low, high):
 838 |         """
 839 |         Generate random float in [low,high[
 840 |         """
 841 | 
 842 |         return self.np_random.uniform(low, high)
 843 | 
 844 |     def _rand_bool(self):
 845 |         """
 846 |         Generate random boolean value
 847 |         """
 848 | 
 849 |         return (self.np_random.randint(0, 2) == 0)
 850 | 
 851 |     def _rand_elem(self, iterable):
 852 |         """
 853 |         Pick a random element in a list
 854 |         """
 855 | 
 856 |         lst = list(iterable)
 857 |         idx = self._rand_int(0, len(lst))
 858 |         return lst[idx]
 859 | 
 860 |     def _rand_subset(self, iterable, num_elems):
 861 |         """
 862 |         Sample a random subset of distinct elements of a list
 863 |         """
 864 | 
 865 |         lst = list(iterable)
 866 |         assert num_elems <= len(lst)
 867 | 
 868 |         out = []
 869 | 
 870 |         while len(out) < num_elems:
 871 |             elem = self._rand_elem(lst)
 872 |             lst.remove(elem)
 873 |             out.append(elem)
 874 | 
 875 |         return out
 876 | 
 877 |     def _rand_color(self):
 878 |         """
 879 |         Generate a random color name (string)
 880 |         """
 881 | 
 882 |         return self._rand_elem(COLOR_NAMES)
 883 | 
 884 |     def _rand_pos(self, xLow, xHigh, yLow, yHigh):
 885 |         """
 886 |         Generate a random (x,y) position tuple
 887 |         """
 888 | 
 889 |         return (
 890 |             self.np_random.randint(xLow, xHigh),
 891 |             self.np_random.randint(yLow, yHigh)
 892 |         )
 893 | 
 894 |     def place_obj(self,
 895 |         obj,
 896 |         top=None,
 897 |         size=None,
 898 |         reject_fn=None,
 899 |         max_tries=math.inf
 900 |     ):
 901 |         """
 902 |         Place an object at an empty position in the grid
 903 | 
 904 |         :param top: top-left position of the rectangle where to place
 905 |         :param size: size of the rectangle where to place
 906 |         :param reject_fn: function to filter out potential positions
 907 |         """
 908 | 
 909 |         if top is None:
 910 |             top = (0, 0)
 911 |         else:
 912 |             top = (max(top[0], 0), max(top[1], 0))
 913 | 
 914 |         if size is None:
 915 |             size = (self.grid.width, self.grid.height)
 916 | 
 917 |         num_tries = 0
 918 | 
 919 |         while True:
 920 |             # This is to handle with rare cases where rejection sampling
 921 |             # gets stuck in an infinite loop
 922 |             if num_tries > max_tries:
 923 |                 raise RecursionError('rejection sampling failed in place_obj')
 924 | 
 925 |             num_tries += 1
 926 | 
 927 |             pos = np.array((
 928 |                 self._rand_int(top[0], min(top[0] + size[0], self.grid.width)),
 929 |                 self._rand_int(top[1], min(top[1] + size[1], self.grid.height))
 930 |             ))
 931 | 
 932 |             # Don't place the object on top of another object
 933 |             if self.grid.get(*pos) != None:
 934 |                 continue
 935 | 
 936 |             # Don't place the object where the agent is
 937 |             if np.array_equal(pos, self.agent_pos):
 938 |                 continue
 939 | 
 940 |             # Check if there is a filtering criterion
 941 |             if reject_fn and reject_fn(self, pos):
 942 |                 continue
 943 | 
 944 |             break
 945 | 
 946 |         self.grid.set(*pos, obj)
 947 | 
 948 |         if obj is not None:
 949 |             obj.init_pos = pos
 950 |             obj.cur_pos = pos
 951 | 
 952 |         return pos
 953 | 
 954 |     def place_agent(
 955 |         self,
 956 |         top=None,
 957 |         size=None,
 958 |         rand_dir=True,
 959 |         max_tries=math.inf
 960 |     ):
 961 |         """
 962 |         Set the agent's starting point at an empty position in the grid
 963 |         """
 964 | 
 965 |         self.agent_pos = None
 966 |         pos = self.place_obj(None, top, size, max_tries=max_tries)
 967 |         self.agent_pos = pos
 968 | 
 969 |         if rand_dir:
 970 |             self.agent_dir = self._rand_int(0, 4)
 971 | 
 972 |         return pos
 973 | 
 974 |     @property
 975 |     def dir_vec(self):
 976 |         """
 977 |         Get the direction vector for the agent, pointing in the direction
 978 |         of forward movement.
 979 |         """
 980 | 
 981 |         assert self.agent_dir >= 0 and self.agent_dir < 4
 982 |         return DIR_TO_VEC[self.agent_dir]
 983 | 
 984 |     @property
 985 |     def right_vec(self):
 986 |         """
 987 |         Get the vector pointing to the right of the agent.
 988 |         """
 989 | 
 990 |         dx, dy = self.dir_vec
 991 |         return np.array((-dy, dx))
 992 | 
 993 |     @property
 994 |     def front_pos(self):
 995 |         """
 996 |         Get the position of the cell that is right in front of the agent
 997 |         """
 998 | 
 999 |         return self.agent_pos + self.dir_vec
1000 | 
1001 |     def get_view_coords(self, i, j):
1002 |         """
1003 |         Translate and rotate absolute grid coordinates (i, j) into the
1004 |         agent's partially observable view (sub-grid). Note that the resulting
1005 |         coordinates may be negative or outside of the agent's view size.
1006 |         """
1007 | 
1008 |         ax, ay = self.agent_pos
1009 |         dx, dy = self.dir_vec
1010 |         rx, ry = self.right_vec
1011 | 
1012 |         # Compute the absolute coordinates of the top-left view corner
1013 |         sz = self.agent_view_size
1014 |         hs = self.agent_view_size // 2
1015 |         tx = ax + (dx * (sz-1)) - (rx * hs)
1016 |         ty = ay + (dy * (sz-1)) - (ry * hs)
1017 | 
1018 |         lx = i - tx
1019 |         ly = j - ty
1020 | 
1021 |         # Project the coordinates of the object relative to the top-left
1022 |         # corner onto the agent's own coordinate system
1023 |         vx = (rx*lx + ry*ly)
1024 |         vy = -(dx*lx + dy*ly)
1025 | 
1026 |         return vx, vy
1027 | 
1028 |     def get_view_exts(self):
1029 |         """
1030 |         Get the extents of the square set of tiles visible to the agent
1031 |         Note: the bottom extent indices are not included in the set
1032 |         """
1033 | 
1034 |         # Facing right
1035 |         if self.agent_dir == 0:
1036 |             topX = self.agent_pos[0]
1037 |             topY = self.agent_pos[1] - self.agent_view_size // 2
1038 |         # Facing down
1039 |         elif self.agent_dir == 1:
1040 |             topX = self.agent_pos[0] - self.agent_view_size // 2
1041 |             topY = self.agent_pos[1]
1042 |         # Facing left
1043 |         elif self.agent_dir == 2:
1044 |             topX = self.agent_pos[0] - self.agent_view_size + 1
1045 |             topY = self.agent_pos[1] - self.agent_view_size // 2
1046 |         # Facing up
1047 |         elif self.agent_dir == 3:
1048 |             topX = self.agent_pos[0] - self.agent_view_size // 2
1049 |             topY = self.agent_pos[1] - self.agent_view_size + 1
1050 |         else:
1051 |             assert False, "invalid agent direction"
1052 | 
1053 |         botX = topX + self.agent_view_size
1054 |         botY = topY + self.agent_view_size
1055 | 
1056 |         return (topX, topY, botX, botY)
1057 | 
1058 |     def relative_coords(self, x, y):
1059 |         """
1060 |         Check if a grid position belongs to the agent's field of view, and returns the corresponding coordinates
1061 |         """
1062 | 
1063 |         vx, vy = self.get_view_coords(x, y)
1064 | 
1065 |         if vx < 0 or vy < 0 or vx >= self.agent_view_size or vy >= self.agent_view_size:
1066 |             return None
1067 | 
1068 |         return vx, vy
1069 | 
1070 |     def in_view(self, x, y):
1071 |         """
1072 |         check if a grid position is visible to the agent
1073 |         """
1074 | 
1075 |         return self.relative_coords(x, y) is not None
1076 | 
1077 |     def agent_sees(self, x, y):
1078 |         """
1079 |         Check if a non-empty grid position is visible to the agent
1080 |         """
1081 | 
1082 |         coordinates = self.relative_coords(x, y)
1083 |         if coordinates is None:
1084 |             return False
1085 |         vx, vy = coordinates
1086 | 
1087 |         obs = self.gen_obs()
1088 |         obs_grid = Grid.decode(obs['image'])
1089 |         obs_cell = obs_grid.get(vx, vy)
1090 |         world_cell = self.grid.get(x, y)
1091 | 
1092 |         return obs_cell is not None and obs_cell.type == world_cell.type
1093 | 
1094 |     def step(self, action):
1095 |         self.step_count += 1
1096 | 
1097 |         reward = 0
1098 |         done = False
1099 | 
1100 |         # Get the position in front of the agent
1101 |         fwd_pos = self.front_pos
1102 | 
1103 |         # Get the contents of the cell in front of the agent
1104 |         fwd_cell = self.grid.get(*fwd_pos)
1105 | 
1106 |         # Rotate left
1107 |         if action == self.actions.left:
1108 |             self.agent_dir -= 1
1109 |             if self.agent_dir < 0:
1110 |                 self.agent_dir += 4
1111 | 
1112 |         # Rotate right
1113 |         elif action == self.actions.right:
1114 |             self.agent_dir = (self.agent_dir + 1) % 4
1115 | 
1116 |         # Move forward
1117 |         elif action == self.actions.forward:
1118 |             if fwd_cell == None or fwd_cell.can_overlap():
1119 |                 self.agent_pos = fwd_pos
1120 |             if fwd_cell != None and fwd_cell.type == 'goal':
1121 |                 done = True
1122 |                 reward = self._reward()
1123 |             if fwd_cell != None and fwd_cell.type == 'lava':
1124 |                 done = True
1125 | 
1126 |         # Pick up an object
1127 |         elif action == self.actions.pickup:
1128 |             if fwd_cell and fwd_cell.can_pickup():
1129 |                 if self.carrying is None:
1130 |                     self.carrying = fwd_cell
1131 |                     self.carrying.cur_pos = np.array([-1, -1])
1132 |                     self.grid.set(*fwd_pos, None)
1133 | 
1134 |         # Drop an object
1135 |         elif action == self.actions.drop:
1136 |             if not fwd_cell and self.carrying:
1137 |                 self.grid.set(*fwd_pos, self.carrying)
1138 |                 self.carrying.cur_pos = fwd_pos
1139 |                 self.carrying = None
1140 | 
1141 |         # Toggle/activate an object
1142 |         elif action == self.actions.toggle:
1143 |             if fwd_cell:
1144 |                 fwd_cell.toggle(self, fwd_pos)
1145 | 
1146 |         # Done action (not used by default)
1147 |         elif action == self.actions.done:
1148 |             pass
1149 | 
1150 |         else:
1151 |             assert False, "unknown action"
1152 | 
1153 |         if self.step_count >= self.max_steps:
1154 |             done = True
1155 | 
1156 |         obs = self.gen_obs()
1157 | 
1158 |         return obs, reward, done, {}
1159 | 
1160 |     def gen_obs_grid(self):
1161 |         """
1162 |         Generate the sub-grid observed by the agent.
1163 |         This method also outputs a visibility mask telling us which grid
1164 |         cells the agent can actually see.
1165 |         """
1166 | 
1167 |         topX, topY, botX, botY = self.get_view_exts()
1168 | 
1169 |         grid = self.grid.slice(topX, topY, self.agent_view_size, self.agent_view_size)
1170 | 
1171 |         for i in range(self.agent_dir + 1):
1172 |             grid = grid.rotate_left()
1173 | 
1174 |         # Process occluders and visibility
1175 |         # Note that this incurs some performance cost
1176 |         if not self.see_through_walls:
1177 |             vis_mask = grid.process_vis(agent_pos=(self.agent_view_size // 2 , self.agent_view_size - 1))
1178 |         else:
1179 |             vis_mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool)
1180 | 
1181 |         # Make it so the agent sees what it's carrying
1182 |         # We do this by placing the carried object at the agent's position
1183 |         # in the agent's partially observable view
1184 |         agent_pos = grid.width // 2, grid.height - 1
1185 |         if self.carrying:
1186 |             grid.set(*agent_pos, self.carrying)
1187 |         else:
1188 |             grid.set(*agent_pos, None)
1189 | 
1190 |         return grid, vis_mask
1191 | 
1192 |     def gen_obs(self):
1193 |         """
1194 |         Generate the agent's view (partially observable, low-resolution encoding)
1195 |         """
1196 | 
1197 |         grid, vis_mask = self.gen_obs_grid()
1198 | 
1199 |         # Encode the partially observable view into a numpy array
1200 |         image = grid.encode(vis_mask)
1201 | 
1202 |         assert hasattr(self, 'mission'), "environments must define a textual mission string"
1203 | 
1204 |         # Observations are dictionaries containing:
1205 |         # - an image (partially observable view of the environment)
1206 |         # - the agent's direction/orientation (acting as a compass)
1207 |         # - a textual mission string (instructions for the agent)
1208 |         obs = {
1209 |             'image': image,
1210 |             'direction': self.agent_dir,
1211 |             'mission': self.mission
1212 |         }
1213 | 
1214 |         return obs
1215 | 
1216 |     def get_obs_render(self, obs, tile_size=CELL_PIXELS//2, mode='pixmap'):
1217 |         """
1218 |         Render an agent observation for visualization
1219 |         """
1220 | 
1221 |         if self.obs_render == None:
1222 |             from gym_minigrid.rendering import Renderer
1223 |             self.obs_render = Renderer(
1224 |                 self.agent_view_size * tile_size,
1225 |                 self.agent_view_size * tile_size
1226 |             )
1227 | 
1228 |         r = self.obs_render
1229 | 
1230 |         r.beginFrame()
1231 | 
1232 |         grid = Grid.decode(obs)
1233 | 
1234 |         # Render the whole grid
1235 |         grid.render(r, tile_size)
1236 | 
1237 |         # Draw the agent
1238 |         ratio = tile_size / CELL_PIXELS
1239 |         r.push()
1240 |         r.scale(ratio, ratio)
1241 |         r.translate(
1242 |             CELL_PIXELS * (0.5 + self.agent_view_size // 2),
1243 |             CELL_PIXELS * (self.agent_view_size - 0.5)
1244 |         )
1245 |         r.rotate(3 * 90)
1246 |         r.setLineColor(255, 0, 0)
1247 |         r.setColor(255, 0, 0)
1248 |         r.drawPolygon([
1249 |             (-12, 10),
1250 |             ( 12,  0),
1251 |             (-12, -10)
1252 |         ])
1253 |         r.pop()
1254 | 
1255 |         r.endFrame()
1256 | 
1257 |         if mode == 'rgb_array':
1258 |             return r.getArray()
1259 |         elif mode == 'pixmap':
1260 |             return r.getPixmap()
1261 |         return r
1262 | 
1263 |     def render(self, mode='human', close=False, highlight=True, tile_size=CELL_PIXELS):
1264 |         """
1265 |         Render the whole-grid human view
1266 |         """
1267 | 
1268 |         if close:
1269 |             if self.grid_render:
1270 |                 self.grid_render.close()
1271 |             return
1272 | 
1273 |         if self.grid_render is None or self.grid_render.window is None or (self.grid_render.width != self.width * tile_size):
1274 |             from gym_minigrid.rendering import Renderer
1275 |             self.grid_render = Renderer(
1276 |                 self.width * tile_size,
1277 |                 self.height * tile_size,
1278 |                 True if mode == 'human' else False
1279 |             )
1280 | 
1281 |         r = self.grid_render
1282 | 
1283 |         if r.window:
1284 |             r.window.setText(self.mission)
1285 | 
1286 |         r.beginFrame()
1287 | 
1288 |         # Render the whole grid
1289 |         self.grid.render(r, tile_size)
1290 | 
1291 |         # Draw the agent
1292 |         ratio = tile_size / CELL_PIXELS
1293 |         r.push()
1294 |         r.scale(ratio, ratio)
1295 |         r.translate(
1296 |             CELL_PIXELS * (self.agent_pos[0] + 0.5),
1297 |             CELL_PIXELS * (self.agent_pos[1] + 0.5)
1298 |         )
1299 |         r.rotate(self.agent_dir * 90)
1300 |         r.setLineColor(255, 0, 0)
1301 |         r.setColor(255, 0, 0)
1302 |         r.drawPolygon([
1303 |             (-12, 10),
1304 |             ( 12,  0),
1305 |             (-12, -10)
1306 |         ])
1307 |         r.pop()
1308 | 
1309 |         # Compute which cells are visible to the agent
1310 |         _, vis_mask = self.gen_obs_grid()
1311 | 
1312 |         # Compute the absolute coordinates of the bottom-left corner
1313 |         # of the agent's view area
1314 |         f_vec = self.dir_vec
1315 |         r_vec = self.right_vec
1316 |         top_left = self.agent_pos + f_vec * (self.agent_view_size-1) - r_vec * (self.agent_view_size // 2)
1317 | 
1318 |         # For each cell in the visibility mask
1319 |         if highlight:
1320 |             for vis_j in range(0, self.agent_view_size):
1321 |                 for vis_i in range(0, self.agent_view_size):
1322 |                     # If this cell is not visible, don't highlight it
1323 |                     if not vis_mask[vis_i, vis_j]:
1324 |                         continue
1325 | 
1326 |                     # Compute the world coordinates of this cell
1327 |                     abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i)
1328 | 
1329 |                     # Highlight the cell
1330 |                     r.fillRect(
1331 |                         abs_i * tile_size,
1332 |                         abs_j * tile_size,
1333 |                         tile_size,
1334 |                         tile_size,
1335 |                         255, 255, 255, 75
1336 |                     )
1337 | 
1338 |         r.endFrame()
1339 | 
1340 |         if mode == 'rgb_array':
1341 |             return r.getArray()
1342 |         elif mode == 'pixmap':
1343 |             return r.getPixmap()
1344 |         return r
1345 | 


--------------------------------------------------------------------------------