├── gym_hanoi ├── envs │ ├── __init__.py │ ├── .DS_Store │ └── hanoi_env.py ├── .DS_Store ├── __init__.py └── tests │ └── test.py ├── images ├── env_gen.png ├── env_change.png └── hanoi_problem.png ├── .gitignore ├── setup.py ├── README.md └── tryout_env.ipynb /gym_hanoi/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_hanoi.envs.hanoi_env import HanoiEnv 2 | -------------------------------------------------------------------------------- /gym_hanoi/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/gym_hanoi/.DS_Store -------------------------------------------------------------------------------- /images/env_gen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/images/env_gen.png -------------------------------------------------------------------------------- /images/env_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/images/env_change.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg 2 | *.ipynb_checkpoints 3 | *.egg-info 4 | *.pyc 5 | *~ 6 | .DS_Store 7 | __pycache__ 8 | -------------------------------------------------------------------------------- /gym_hanoi/envs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/gym_hanoi/envs/.DS_Store -------------------------------------------------------------------------------- /images/hanoi_problem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/images/hanoi_problem.png -------------------------------------------------------------------------------- /gym_hanoi/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from gym.envs.registration import register 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | register( 7 | id='Hanoi-v0', 8 | entry_point='gym_hanoi.envs:HanoiEnv', 9 | ) 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setup(name='gym_hanoi', 7 | version='0.0.2', 8 | author='Robert Tjrako Lange', 9 | author_email='robert.t.lange@web.de', 10 | license='MIT', 11 | description="An OpenAI Gym Environment for the Towers of Hanoi Problem.", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/RobertTLange/gym-hanoi", 15 | install_requires=['numpy', 'gym'] 16 | ) 17 | -------------------------------------------------------------------------------- /gym_hanoi/tests/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | import gym 4 | import gym_hanoi 5 | 6 | 7 | class Environment(unittest.TestCase): 8 | 9 | def test_hanoi_env_make(self): 10 | gym.make("Hanoi-v0") 11 | 12 | def test_hanoi_env_reset(self): 13 | env = gym.make("Hanoi-v0") 14 | env.reset() 15 | 16 | def test_hanoi_env_step(self): 17 | env = gym.make("Hanoi-v0") 18 | env.reset() 19 | state, reward, done, info = env.step(0) 20 | self.assertEqual(len(state), 4) 21 | self.assertEqual(env.env_noise, 0) 22 | 23 | def test_hanoi_env_make_noise(self): 24 | env = gym.make("Hanoi-v0") 25 | env.set_env_parameters(env_noise=0.5) 26 | self.assertEqual(env.env_noise, 0.5) 27 | 28 | def test_hanoi_env_make_disks(self): 29 | env = gym.make("Hanoi-v0") 30 | env.set_env_parameters(num_disks=7) 31 | env.reset() 32 | state, reward, done, info = env.step(0) 33 | self.assertEqual(len(7), 7) 34 | self.assertEqual(env.env_noise, 0) 35 | 36 | def test_hanoi_env_make_noise_and_disks(self): 37 | env = gym.make("Hanoi-v0") 38 | env.set_env_parameters(env_noise=0.3, num_disks=7) 39 | env.reset() 40 | state, reward, done, info = env.step(0) 41 | self.assertEqual(len(7), 7) 42 | self.assertEqual(env.env_noise, 0.3) 43 | 44 | 45 | if __name__ == '__main__': 46 | unittest.main() 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gym-hanoi 2 | ## A Towers of Hanoi environment in OpenAI Gym Style 3 | ## Author: Robert Tjarko Lage 4 | ## Date: 09/2018 5 | 6 | This repository extends the OpenAI gym with the classical Towers of Hanoi environment. 7 | 8 | * Game Description: Environment consists of three pegs and a number of disks (N) of different sizes which can slide onto the pegs. The puzzle starts with all disks stacked on the first peg in ascending order, with the largest at the bottom and the smallest on top. The objective of the game is to move all the disks to the third peg. The only legal moves are those which take the top-most disk from one peg to another, with the restriction that a disk may never be placed upon a smaller disk. 9 | 10 | * States: Tuple of len N (number of discs) - elements: pole indices in [0,1,2]. 11 | 12 | * Actions: Integer between 0 and 5 (see descriptions below). 13 | 14 | * Episode terminates (successfully) if s_{t+1} = (2)_{i=1}^N. 15 | 16 | * Below you can find the optimal policy for N=3 (and states indexed from 1 to 3): 17 | 18 | ![Alt text](images/hanoi_problem.png) 19 | 20 | 21 | # Action Space 22 | * (0,1) - top disk of pole 0 to top of pole 1 - "a". 23 | * (0,2) - top disk of pole 0 to top of pole 2 - "b". 24 | * (1,0) - top disk of pole 1 to top of pole 0 - "c". 25 | * (1,2) - top disk of pole 1 to top of pole 2 - "d". 26 | * (2,0) - top disk of pole 2 to top of pole 0 - "e". 27 | * (2,1) - top disk of pole 2 to top of pole 1 - "f". 28 | 29 | # State Space 30 | 31 | Definition of states (e.g. N=3): 32 | * (0,0,0) - Initial state where all 3 three disks are stacked on pole 0. 33 | * (2,2,2) - Final state where all 3 three disks are stacked on pole 2. 34 | * (0,1,2) - Smallest disk on pole 0, middle disk on pole 1, largest on pole 2. 35 | 36 | # Installation 37 | 38 | * Clone the repository and install the package. 39 | ``` 40 | git clone https://github.com/RobertTLange/gym-hanoi 41 | cd gym_hanoi 42 | pip install -e . (if you use Python 2.) 43 | python setupy.py install (if you use Python 3.) 44 | ``` 45 | 46 | * Import the package and create an environment (default number of disks is 4 and transitions are deterministic). 47 | * **reset** - Episode starts in N-dimensional tuple of zeros (starting position with all disks on first pole) 48 | * **step** - Returns new state, transition reward, episode status and information about transition (Did environmental noise currupt transition?, Was the action valid?) 49 | 50 | ![Alt text](images/env_gen.png) 51 | 52 | * You can set the number of desired disks and the transition failure rate in the following way: 53 | ![Alt text](images/env_change.png) 54 | 55 | 56 | # Notes 57 | * Environment is especially suited for prototyping solutions to long-term credit assignment problems, sparse rewards and curriculum learning. 58 | * Following format guide in https://github.com/openai/gym/tree/master/gym/envs#how-to-create-new-environments-for-gym. 59 | -------------------------------------------------------------------------------- /tryout_env.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Obtaining file:///Users/rtl/Dropbox/PHD_ECN/PROJECTS/ActionGrammars/gym-hanoi\n", 13 | "Requirement already satisfied: numpy in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/numpy-1.16.0rc2-py3.6-macosx-10.7-x86_64.egg (from gym-hanoi==0.0.2) (1.16.0rc2)\n", 14 | "Requirement already satisfied: gym in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/gym-0.10.9-py3.6.egg (from gym-hanoi==0.0.2) (0.10.9)\n", 15 | "Requirement already satisfied: scipy in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/scipy-1.2.0-py3.6-macosx-10.7-x86_64.egg (from gym->gym-hanoi==0.0.2) (1.2.0)\n", 16 | "Requirement already satisfied: requests>=2.0 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/requests-2.21.0-py3.6.egg (from gym->gym-hanoi==0.0.2) (2.21.0)\n", 17 | "Requirement already satisfied: six in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages (from gym->gym-hanoi==0.0.2) (1.12.0)\n", 18 | "Requirement already satisfied: pyglet>=1.2.0 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/pyglet-1.4.0b1-py3.6.egg (from gym->gym-hanoi==0.0.2) (1.4.0b1)\n", 19 | "Requirement already satisfied: certifi>=2017.4.17 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages (from requests>=2.0->gym->gym-hanoi==0.0.2) (2018.1.18)\n", 20 | "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/chardet-3.0.4-py3.6.egg (from requests>=2.0->gym->gym-hanoi==0.0.2) (3.0.4)\n", 21 | "Requirement already satisfied: idna<2.9,>=2.5 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/idna-2.8-py3.6.egg (from requests>=2.0->gym->gym-hanoi==0.0.2) (2.8)\n", 22 | "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/urllib3-1.24.1-py3.6.egg (from requests>=2.0->gym->gym-hanoi==0.0.2) (1.24.1)\n", 23 | "Requirement already satisfied: future in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/future-0.17.1-py3.6.egg (from pyglet>=1.2.0->gym->gym-hanoi==0.0.2) (0.17.1)\n", 24 | "Installing collected packages: gym-hanoi\n", 25 | " Found existing installation: gym-hanoi 0.0.2\n", 26 | " Uninstalling gym-hanoi-0.0.2:\n", 27 | " Successfully uninstalled gym-hanoi-0.0.2\n", 28 | " Running setup.py develop for gym-hanoi\n", 29 | "Successfully installed gym-hanoi\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "!pip install -e ." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# Run unit tests\n", 44 | "# !python gym_swarm/tests/test.py" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "from IPython.display import set_matplotlib_formats\n", 54 | "set_matplotlib_formats('retina')\n", 55 | "\n", 56 | "# Import the environment\n", 57 | "import gym\n", 58 | "import gym_hanoi\n", 59 | "\n", 60 | "# Import base modules\n", 61 | "import time\n", 62 | "import numpy as np\n", 63 | "import matplotlib.pyplot as plt" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "Hanoi Environment Parameters have been set to:\n", 76 | "\t Number of Disks: 6\n", 77 | "\t Transition Failure Probability: 0.1\n", 78 | "State Transition:\n", 79 | "--> Disk 1 moves from pole 0 to 1\n", 80 | "--> Disk 2 moves from pole 0 to 0\n", 81 | "--> Disk 3 moves from pole 0 to 0\n", 82 | "--> Disk 4 moves from pole 0 to 0\n", 83 | "--> Disk 5 moves from pole 0 to 0\n", 84 | "--> Disk 6 moves from pole 0 to 0\n" 85 | ] 86 | }, 87 | { 88 | "name": "stderr", 89 | "output_type": "stream", 90 | "text": [ 91 | "/Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/gym-0.10.9-py3.6.egg/gym/envs/registration.py:14: PkgResourcesDeprecationWarning: Parameters to load are deprecated. Call .resolve and .require separately.\n", 92 | " result = entry_point.load(False)\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "# Make the environment and change the base params\n", 98 | "num_disks = 6\n", 99 | "env_noise = 0.1\n", 100 | "\n", 101 | "env = gym.make(\"Hanoi-v0\")\n", 102 | "env.set_env_parameters(num_disks, env_noise, verbose=True)\n", 103 | "state = env.reset()\n", 104 | "next_state, reward, done, _ = env.step(0)\n", 105 | "\n", 106 | "print(\"State Transition:\")\n", 107 | "for i in range(num_disks):\n", 108 | " print(\"--> Disk {} moves from pole {} to {}\".format(i+1, state[i], next_state[i]))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 7, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "CPU times: user 11.3 ms, sys: 262 µs, total: 11.6 ms\n", 121 | "Wall time: 11.4 ms\n", 122 | "CPU times: user 201 ms, sys: 2.59 ms, total: 203 ms\n", 123 | "Wall time: 203 ms\n", 124 | "CPU times: user 5.72 s, sys: 20.5 ms, total: 5.74 s\n", 125 | "Wall time: 5.75 s\n", 126 | "CPU times: user 3min 50s, sys: 1.06 s, total: 3min 51s\n", 127 | "Wall time: 3min 51s\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "# Check how much time it takes to compute the movability map\n", 133 | "for num_disks in range(3, 7):\n", 134 | " env = gym.make(\"Hanoi-v0\")\n", 135 | " env.set_env_parameters(num_disks, verbose=False)\n", 136 | " state = env.reset()\n", 137 | " %time env.get_movability_map()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [] 146 | } 147 | ], 148 | "metadata": { 149 | "kernelspec": { 150 | "display_name": "Python3 (AG)", 151 | "language": "python", 152 | "name": "ag" 153 | }, 154 | "language_info": { 155 | "codemirror_mode": { 156 | "name": "ipython", 157 | "version": 3 158 | }, 159 | "file_extension": ".py", 160 | "mimetype": "text/x-python", 161 | "name": "python", 162 | "nbconvert_exporter": "python", 163 | "pygments_lexer": "ipython3", 164 | "version": "3.6.6" 165 | } 166 | }, 167 | "nbformat": 4, 168 | "nbformat_minor": 2 169 | } 170 | -------------------------------------------------------------------------------- /gym_hanoi/envs/hanoi_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import error, spaces, utils 3 | from gym.utils import seeding 4 | 5 | import random 6 | import itertools 7 | import numpy as np 8 | 9 | 10 | class HanoiEnv(gym.Env): 11 | metadata = {'render.modes': ['human']} 12 | 13 | def __init__(self): 14 | self.num_disks = 4 15 | self.env_noise = 0 16 | self.action_space = spaces.Discrete(6) 17 | self.observation_space = spaces.Tuple(self.num_disks*(spaces.Discrete(3),)) 18 | 19 | self.current_state = None 20 | self.goal_state = self.num_disks*(2,) 21 | 22 | self.done = None 23 | self.ACTION_LOOKUP = {0 : "(0,1) - top disk of pole 0 to top of pole 1 ", 24 | 1 : "(0,2) - top disk of pole 0 to top of pole 2 ", 25 | 2 : "(1,0) - top disk of pole 1 to top of pole 0", 26 | 3 : "(1,2) - top disk of pole 1 to top of pole 2", 27 | 4 : "(2,0) - top disk of pole 2 to top of pole 0", 28 | 5 : "(2,1) - top disk of pole 2 to top of pole 1"} 29 | 30 | def step(self, action): 31 | """ 32 | * Inputs: 33 | - action: integer from 0 to 5 (see ACTION_LOOKUP) 34 | * Outputs: 35 | - current_state: state after transition 36 | - reward: reward from transition 37 | - done: episode state 38 | - info: dict of booleans (noisy?/invalid action?) 39 | 0. Check if transition is noisy or not 40 | 1. Transform action (0 to 5 integer) to tuple move - see Lookup 41 | 2. Check if move is allowed 42 | 3. If it is change corresponding entry | If not return same state 43 | 4. Check if episode completed and return 44 | """ 45 | if self.done: 46 | raise RuntimeError("Episode has finished. Call env.reset() to start a new episode.") 47 | 48 | info = {"transition_failure": False, 49 | "invalid_action": False} 50 | 51 | if self.env_noise > 0: 52 | r_num = random.random() 53 | if r_num <= self.env_noise: 54 | action = random.randint(0, self.action_space.n-1) 55 | info["transition_failure"] = True 56 | 57 | move = action_to_move[action] 58 | 59 | if self.move_allowed(move): 60 | disk_to_move = min(self.disks_on_peg(move[0])) 61 | moved_state = list(self.current_state) 62 | moved_state[disk_to_move] = move[1] 63 | self.current_state = tuple(moved_state) 64 | else: 65 | info["invalid_action"] = True 66 | 67 | if self.current_state == self.goal_state: 68 | reward = 100 69 | self.done = True 70 | elif info["invalid_action"] == True: 71 | reward = -1 72 | else: 73 | reward = 0 74 | 75 | return self.current_state, reward, self.done, info 76 | 77 | def disks_on_peg(self, peg): 78 | """ 79 | * Inputs: 80 | - peg: pole to check how many/which disks are in it 81 | * Outputs: 82 | - list of disk numbers that are allocated on pole 83 | """ 84 | return [disk for disk in range(self.num_disks) if self.current_state[disk] == peg] 85 | 86 | def move_allowed(self, move): 87 | """ 88 | * Inputs: 89 | - move: tuple of state transition (see ACTION_LOOKUP) 90 | * Outputs: 91 | - boolean indicating whether action is allowed from state! 92 | move[0] - peg from which we want to move disc 93 | move[1] - peg we want to move disc to 94 | Allowed if: 95 | * discs_to is empty (no disc of peg) set to true 96 | * Smallest disc on target pole larger than smallest on prev 97 | """ 98 | disks_from = self.disks_on_peg(move[0]) 99 | disks_to = self.disks_on_peg(move[1]) 100 | 101 | if disks_from: 102 | return (min(disks_to) > min(disks_from)) if disks_to else True 103 | else: 104 | return False 105 | 106 | def reset(self): 107 | self.current_state = self.num_disks * (0,) 108 | self.done = False 109 | return self.current_state 110 | 111 | def render(self, mode='human', close=False): 112 | return 113 | 114 | def set_env_parameters(self, num_disks=4, env_noise=0, verbose=True): 115 | self.num_disks = num_disks 116 | self.env_noise = env_noise 117 | self.observation_space = spaces.Tuple(self.num_disks*(spaces.Discrete(3),)) 118 | self.goal_state = self.num_disks*(2,) 119 | 120 | if verbose: 121 | print("Hanoi Environment Parameters have been set to:") 122 | print("\t Number of Disks: {}".format(self.num_disks)) 123 | print("\t Transition Failure Probability: {}".format(self.env_noise)) 124 | 125 | def get_movability_map(self, fill=False): 126 | # Initialize movability map 127 | mov_map = np.zeros(self.num_disks*(3, ) + (6,)) 128 | 129 | if fill: 130 | # Get list of all states as tuples 131 | id_list = self.num_disks*[0] + self.num_disks*[1] + self.num_disks*[2] 132 | states = list(itertools.permutations(id_list, self.num_disks)) 133 | 134 | for state in states: 135 | for action in range(6): 136 | move = action_to_move[action] 137 | disks_from = [] 138 | disks_to = [] 139 | for d in range(self.num_disks): 140 | if state[d] == move[0]: disks_from.append(d) 141 | elif state[d] == move[1]: disks_to.append(d) 142 | 143 | if disks_from: valid = (min(disks_to) > min(disks_from)) if disks_to else True 144 | else: valid = False 145 | 146 | if not valid: mov_map[state][action] = -np.inf 147 | 148 | move_from = [m[0] for m in action_to_move] 149 | move_to = [m[1] for m in action_to_move] 150 | 151 | # # Try to get rid of action loop - vectorize... 152 | # for state in states: 153 | # s = np.array(state) 154 | # disks_from = [] 155 | # disks_to = [] 156 | # 157 | # for d in range(self.num_disks): 158 | # a_from = [a for a, v in enumerate(move_from) if v == s[d]] 159 | # a_to = [a for a, v in enumerate(move_to) if v == s[d]] 160 | # 161 | # if disks_from: 162 | # valid = (min(disks_to) > min(disks_from)) if disks_to else True 163 | # else: 164 | # valid = False 165 | # 166 | # if not valid: 167 | # mov_map[state][action] = -np.inf 168 | return mov_map 169 | 170 | 171 | action_to_move = [(0, 1), (0, 2), (1, 0), 172 | (1, 2), (2, 0), (2, 1)] 173 | 174 | # action_to_move = {0: (0, 1), 1: (0, 2), 2: (1, 0), 175 | # 3: (1, 2), 4: (2, 0), 5: (2, 1)} 176 | --------------------------------------------------------------------------------