├── gym_hanoi
    ├── envs
    │   ├── __init__.py
    │   ├── .DS_Store
    │   └── hanoi_env.py
    ├── .DS_Store
    ├── __init__.py
    └── tests
    │   └── test.py
├── images
    ├── env_gen.png
    ├── env_change.png
    └── hanoi_problem.png
├── .gitignore
├── setup.py
├── README.md
└── tryout_env.ipynb


/gym_hanoi/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_hanoi.envs.hanoi_env import HanoiEnv
2 | 


--------------------------------------------------------------------------------
/gym_hanoi/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/gym_hanoi/.DS_Store


--------------------------------------------------------------------------------
/images/env_gen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/images/env_gen.png


--------------------------------------------------------------------------------
/images/env_change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/images/env_change.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg
2 | *.ipynb_checkpoints
3 | *.egg-info
4 | *.pyc
5 | *~
6 | .DS_Store
7 | __pycache__
8 | 


--------------------------------------------------------------------------------
/gym_hanoi/envs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/gym_hanoi/envs/.DS_Store


--------------------------------------------------------------------------------
/images/hanoi_problem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobertTLange/gym-hanoi/HEAD/images/hanoi_problem.png


--------------------------------------------------------------------------------
/gym_hanoi/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from gym.envs.registration import register
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | register(
 7 |     id='Hanoi-v0',
 8 |     entry_point='gym_hanoi.envs:HanoiEnv',
 9 | )
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setup(name='gym_hanoi',
 7 |       version='0.0.2',
 8 |       author='Robert Tjrako Lange',
 9 |       author_email='robert.t.lange@web.de',
10 |       license='MIT',
11 |       description="An OpenAI Gym Environment for the Towers of Hanoi Problem.",
12 |       long_description=long_description,
13 |       long_description_content_type="text/markdown",
14 |       url="https://github.com/RobertTLange/gym-hanoi",
15 |       install_requires=['numpy', 'gym']
16 |       )
17 | 


--------------------------------------------------------------------------------
/gym_hanoi/tests/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | import gym
 4 | import gym_hanoi
 5 | 
 6 | 
 7 | class Environment(unittest.TestCase):
 8 | 
 9 |     def test_hanoi_env_make(self):
10 |         gym.make("Hanoi-v0")
11 | 
12 |     def test_hanoi_env_reset(self):
13 |         env = gym.make("Hanoi-v0")
14 |         env.reset()
15 | 
16 |     def test_hanoi_env_step(self):
17 |         env = gym.make("Hanoi-v0")
18 |         env.reset()
19 |         state, reward, done, info = env.step(0)
20 |         self.assertEqual(len(state), 4)
21 |         self.assertEqual(env.env_noise, 0)
22 | 
23 |     def test_hanoi_env_make_noise(self):
24 |         env = gym.make("Hanoi-v0")
25 |         env.set_env_parameters(env_noise=0.5)
26 |         self.assertEqual(env.env_noise, 0.5)
27 | 
28 |     def test_hanoi_env_make_disks(self):
29 |         env = gym.make("Hanoi-v0")
30 |         env.set_env_parameters(num_disks=7)
31 |         env.reset()
32 |         state, reward, done, info = env.step(0)
33 |         self.assertEqual(len(7), 7)
34 |         self.assertEqual(env.env_noise, 0)
35 | 
36 |     def test_hanoi_env_make_noise_and_disks(self):
37 |         env = gym.make("Hanoi-v0")
38 |         env.set_env_parameters(env_noise=0.3, num_disks=7)
39 |         env.reset()
40 |         state, reward, done, info = env.step(0)
41 |         self.assertEqual(len(7), 7)
42 |         self.assertEqual(env.env_noise, 0.3)
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     unittest.main()
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # gym-hanoi
 2 | ## A Towers of Hanoi environment in OpenAI Gym Style
 3 | ## Author: Robert Tjarko Lage
 4 | ## Date: 09/2018
 5 | 
 6 | This repository extends the OpenAI gym with the classical Towers of Hanoi environment.
 7 | 
 8 | * Game Description: Environment consists of three pegs and a number of disks (N) of different sizes which can slide onto the pegs. The puzzle starts with all disks stacked on the first peg in ascending order, with the largest at the bottom and the smallest on top. The objective of the game is to move all the disks to the third peg. The only legal moves are those which take the top-most disk from one peg to another, with the restriction that a disk may never be placed upon a smaller disk.
 9 | 
10 | * States: Tuple of len N (number of discs) - elements: pole indices in [0,1,2].
11 | 
12 | * Actions: Integer between 0 and 5 (see descriptions below).
13 | 
14 | * Episode terminates (successfully) if s_{t+1} = (2)_{i=1}^N.
15 | 
16 | * Below you can find the optimal policy for N=3 (and states indexed from 1 to 3):
17 | 
18 | ![Alt text](images/hanoi_problem.png)
19 | 
20 | 
21 | # Action Space
22 | * (0,1) - top disk of pole 0 to top of pole 1 - "a".
23 | * (0,2) - top disk of pole 0 to top of pole 2 - "b".
24 | * (1,0) - top disk of pole 1 to top of pole 0 - "c".
25 | * (1,2) - top disk of pole 1 to top of pole 2 - "d".
26 | * (2,0) - top disk of pole 2 to top of pole 0 - "e".
27 | * (2,1) - top disk of pole 2 to top of pole 1 - "f".
28 | 
29 | # State Space
30 | 
31 | Definition of states (e.g. N=3):
32 | * (0,0,0) - Initial state where all 3 three disks are stacked on pole 0.
33 | * (2,2,2) - Final state where all 3 three disks are stacked on pole 2.
34 | * (0,1,2) - Smallest disk on pole 0, middle disk on pole 1, largest on pole 2.
35 | 
36 | # Installation
37 | 
38 | * Clone the repository and install the package.
39 | ```
40 | git clone https://github.com/RobertTLange/gym-hanoi
41 | cd gym_hanoi
42 | pip install -e .  (if you use Python 2.)
43 | python setupy.py install  (if you use Python 3.)
44 | ```
45 | 
46 | * Import the package and create an environment (default number of disks is 4 and transitions are deterministic).
47 | * **reset** - Episode starts in N-dimensional tuple of zeros (starting position with all disks on first pole)
48 | * **step** - Returns new state, transition reward, episode status and information about transition (Did environmental noise currupt transition?, Was the action valid?)
49 | 
50 | ![Alt text](images/env_gen.png)
51 | 
52 | * You can set the number of desired disks and the transition failure rate in the following way:
53 | ![Alt text](images/env_change.png)
54 | 
55 | 
56 | # Notes
57 | * Environment is especially suited for prototyping solutions to long-term credit assignment problems, sparse rewards and curriculum learning.
58 | * Following format guide in https://github.com/openai/gym/tree/master/gym/envs#how-to-create-new-environments-for-gym.
59 | 


--------------------------------------------------------------------------------
/tryout_env.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Obtaining file:///Users/rtl/Dropbox/PHD_ECN/PROJECTS/ActionGrammars/gym-hanoi\n",
 13 |       "Requirement already satisfied: numpy in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/numpy-1.16.0rc2-py3.6-macosx-10.7-x86_64.egg (from gym-hanoi==0.0.2) (1.16.0rc2)\n",
 14 |       "Requirement already satisfied: gym in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/gym-0.10.9-py3.6.egg (from gym-hanoi==0.0.2) (0.10.9)\n",
 15 |       "Requirement already satisfied: scipy in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/scipy-1.2.0-py3.6-macosx-10.7-x86_64.egg (from gym->gym-hanoi==0.0.2) (1.2.0)\n",
 16 |       "Requirement already satisfied: requests>=2.0 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/requests-2.21.0-py3.6.egg (from gym->gym-hanoi==0.0.2) (2.21.0)\n",
 17 |       "Requirement already satisfied: six in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages (from gym->gym-hanoi==0.0.2) (1.12.0)\n",
 18 |       "Requirement already satisfied: pyglet>=1.2.0 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/pyglet-1.4.0b1-py3.6.egg (from gym->gym-hanoi==0.0.2) (1.4.0b1)\n",
 19 |       "Requirement already satisfied: certifi>=2017.4.17 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages (from requests>=2.0->gym->gym-hanoi==0.0.2) (2018.1.18)\n",
 20 |       "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/chardet-3.0.4-py3.6.egg (from requests>=2.0->gym->gym-hanoi==0.0.2) (3.0.4)\n",
 21 |       "Requirement already satisfied: idna<2.9,>=2.5 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/idna-2.8-py3.6.egg (from requests>=2.0->gym->gym-hanoi==0.0.2) (2.8)\n",
 22 |       "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/urllib3-1.24.1-py3.6.egg (from requests>=2.0->gym->gym-hanoi==0.0.2) (1.24.1)\n",
 23 |       "Requirement already satisfied: future in /Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/future-0.17.1-py3.6.egg (from pyglet>=1.2.0->gym->gym-hanoi==0.0.2) (0.17.1)\n",
 24 |       "Installing collected packages: gym-hanoi\n",
 25 |       "  Found existing installation: gym-hanoi 0.0.2\n",
 26 |       "    Uninstalling gym-hanoi-0.0.2:\n",
 27 |       "      Successfully uninstalled gym-hanoi-0.0.2\n",
 28 |       "  Running setup.py develop for gym-hanoi\n",
 29 |       "Successfully installed gym-hanoi\n"
 30 |      ]
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "!pip install -e ."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# Run unit tests\n",
 44 |     "# !python gym_swarm/tests/test.py"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 4,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "from IPython.display import set_matplotlib_formats\n",
 54 |     "set_matplotlib_formats('retina')\n",
 55 |     "\n",
 56 |     "# Import the environment\n",
 57 |     "import gym\n",
 58 |     "import gym_hanoi\n",
 59 |     "\n",
 60 |     "# Import base modules\n",
 61 |     "import time\n",
 62 |     "import numpy as np\n",
 63 |     "import matplotlib.pyplot as plt"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 5,
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "name": "stdout",
 73 |      "output_type": "stream",
 74 |      "text": [
 75 |       "Hanoi Environment Parameters have been set to:\n",
 76 |       "\t Number of Disks: 6\n",
 77 |       "\t Transition Failure Probability: 0.1\n",
 78 |       "State Transition:\n",
 79 |       "--> Disk 1 moves from pole 0 to 1\n",
 80 |       "--> Disk 2 moves from pole 0 to 0\n",
 81 |       "--> Disk 3 moves from pole 0 to 0\n",
 82 |       "--> Disk 4 moves from pole 0 to 0\n",
 83 |       "--> Disk 5 moves from pole 0 to 0\n",
 84 |       "--> Disk 6 moves from pole 0 to 0\n"
 85 |      ]
 86 |     },
 87 |     {
 88 |      "name": "stderr",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "/Users/rtl/anaconda2/envs/AG/lib/python3.6/site-packages/gym-0.10.9-py3.6.egg/gym/envs/registration.py:14: PkgResourcesDeprecationWarning: Parameters to load are deprecated.  Call .resolve and .require separately.\n",
 92 |       "  result = entry_point.load(False)\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "# Make the environment and change the base params\n",
 98 |     "num_disks = 6\n",
 99 |     "env_noise = 0.1\n",
100 |     "\n",
101 |     "env = gym.make(\"Hanoi-v0\")\n",
102 |     "env.set_env_parameters(num_disks, env_noise, verbose=True)\n",
103 |     "state = env.reset()\n",
104 |     "next_state, reward, done, _ = env.step(0)\n",
105 |     "\n",
106 |     "print(\"State Transition:\")\n",
107 |     "for i in range(num_disks):\n",
108 |     "    print(\"--> Disk {} moves from pole {} to {}\".format(i+1, state[i], next_state[i]))"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 7,
114 |    "metadata": {},
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "CPU times: user 11.3 ms, sys: 262 µs, total: 11.6 ms\n",
121 |       "Wall time: 11.4 ms\n",
122 |       "CPU times: user 201 ms, sys: 2.59 ms, total: 203 ms\n",
123 |       "Wall time: 203 ms\n",
124 |       "CPU times: user 5.72 s, sys: 20.5 ms, total: 5.74 s\n",
125 |       "Wall time: 5.75 s\n",
126 |       "CPU times: user 3min 50s, sys: 1.06 s, total: 3min 51s\n",
127 |       "Wall time: 3min 51s\n"
128 |      ]
129 |     }
130 |    ],
131 |    "source": [
132 |     "# Check how much time it takes to compute the movability map\n",
133 |     "for num_disks in range(3, 7):\n",
134 |     "    env = gym.make(\"Hanoi-v0\")\n",
135 |     "    env.set_env_parameters(num_disks, verbose=False)\n",
136 |     "    state = env.reset()\n",
137 |     "    %time env.get_movability_map()"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": []
146 |   }
147 |  ],
148 |  "metadata": {
149 |   "kernelspec": {
150 |    "display_name": "Python3 (AG)",
151 |    "language": "python",
152 |    "name": "ag"
153 |   },
154 |   "language_info": {
155 |    "codemirror_mode": {
156 |     "name": "ipython",
157 |     "version": 3
158 |    },
159 |    "file_extension": ".py",
160 |    "mimetype": "text/x-python",
161 |    "name": "python",
162 |    "nbconvert_exporter": "python",
163 |    "pygments_lexer": "ipython3",
164 |    "version": "3.6.6"
165 |   }
166 |  },
167 |  "nbformat": 4,
168 |  "nbformat_minor": 2
169 | }
170 | 


--------------------------------------------------------------------------------
/gym_hanoi/envs/hanoi_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import error, spaces, utils
  3 | from gym.utils import seeding
  4 | 
  5 | import random
  6 | import itertools
  7 | import numpy as np
  8 | 
  9 | 
 10 | class HanoiEnv(gym.Env):
 11 |     metadata = {'render.modes': ['human']}
 12 | 
 13 |     def __init__(self):
 14 |         self.num_disks = 4
 15 |         self.env_noise = 0
 16 |         self.action_space = spaces.Discrete(6)
 17 |         self.observation_space = spaces.Tuple(self.num_disks*(spaces.Discrete(3),))
 18 | 
 19 |         self.current_state = None
 20 |         self.goal_state = self.num_disks*(2,)
 21 | 
 22 |         self.done = None
 23 |         self.ACTION_LOOKUP = {0 : "(0,1) - top disk of pole 0 to top of pole 1 ",
 24 |                               1 : "(0,2) - top disk of pole 0 to top of pole 2 ",
 25 |                               2 : "(1,0) - top disk of pole 1 to top of pole 0",
 26 |                               3 : "(1,2) - top disk of pole 1 to top of pole 2",
 27 |                               4 : "(2,0) - top disk of pole 2 to top of pole 0",
 28 |                               5 : "(2,1) - top disk of pole 2 to top of pole 1"}
 29 | 
 30 |     def step(self, action):
 31 |         """
 32 |         * Inputs:
 33 |             - action: integer from 0 to 5 (see ACTION_LOOKUP)
 34 |         * Outputs:
 35 |             - current_state: state after transition
 36 |             - reward: reward from transition
 37 |             - done: episode state
 38 |             - info: dict of booleans (noisy?/invalid action?)
 39 |         0. Check if transition is noisy or not
 40 |         1. Transform action (0 to 5 integer) to tuple move - see Lookup
 41 |         2. Check if move is allowed
 42 |         3. If it is change corresponding entry | If not return same state
 43 |         4. Check if episode completed and return
 44 |         """
 45 |         if self.done:
 46 |             raise RuntimeError("Episode has finished. Call env.reset() to start a new episode.")
 47 | 
 48 |         info = {"transition_failure": False,
 49 |                 "invalid_action": False}
 50 | 
 51 |         if self.env_noise > 0:
 52 |             r_num = random.random()
 53 |             if r_num <= self.env_noise:
 54 |                 action = random.randint(0, self.action_space.n-1)
 55 |                 info["transition_failure"] = True
 56 | 
 57 |         move = action_to_move[action]
 58 | 
 59 |         if self.move_allowed(move):
 60 |             disk_to_move = min(self.disks_on_peg(move[0]))
 61 |             moved_state = list(self.current_state)
 62 |             moved_state[disk_to_move] = move[1]
 63 |             self.current_state = tuple(moved_state)
 64 |         else:
 65 |             info["invalid_action"] = True
 66 | 
 67 |         if self.current_state == self.goal_state:
 68 |             reward = 100
 69 |             self.done = True
 70 |         elif info["invalid_action"] == True:
 71 |             reward = -1
 72 |         else:
 73 |             reward = 0
 74 | 
 75 |         return self.current_state, reward, self.done, info
 76 | 
 77 |     def disks_on_peg(self, peg):
 78 |         """
 79 |         * Inputs:
 80 |             - peg: pole to check how many/which disks are in it
 81 |         * Outputs:
 82 |             - list of disk numbers that are allocated on pole
 83 |         """
 84 |         return [disk for disk in range(self.num_disks) if self.current_state[disk] == peg]
 85 | 
 86 |     def move_allowed(self, move):
 87 |         """
 88 |         * Inputs:
 89 |             - move: tuple of state transition (see ACTION_LOOKUP)
 90 |         * Outputs:
 91 |             - boolean indicating whether action is allowed from state!
 92 |         move[0] - peg from which we want to move disc
 93 |         move[1] - peg we want to move disc to
 94 |         Allowed if:
 95 |             * discs_to is empty (no disc of peg) set to true
 96 |             * Smallest disc on target pole larger than smallest on prev
 97 |         """
 98 |         disks_from = self.disks_on_peg(move[0])
 99 |         disks_to = self.disks_on_peg(move[1])
100 | 
101 |         if disks_from:
102 |             return (min(disks_to) > min(disks_from)) if disks_to else True
103 |         else:
104 |             return False
105 | 
106 |     def reset(self):
107 |         self.current_state = self.num_disks * (0,)
108 |         self.done = False
109 |         return self.current_state
110 | 
111 |     def render(self, mode='human', close=False):
112 |         return
113 | 
114 |     def set_env_parameters(self, num_disks=4, env_noise=0, verbose=True):
115 |         self.num_disks = num_disks
116 |         self.env_noise = env_noise
117 |         self.observation_space = spaces.Tuple(self.num_disks*(spaces.Discrete(3),))
118 |         self.goal_state = self.num_disks*(2,)
119 | 
120 |         if verbose:
121 |             print("Hanoi Environment Parameters have been set to:")
122 |             print("\t Number of Disks: {}".format(self.num_disks))
123 |             print("\t Transition Failure Probability: {}".format(self.env_noise))
124 | 
125 |     def get_movability_map(self, fill=False):
126 |         # Initialize movability map
127 |         mov_map = np.zeros(self.num_disks*(3, ) + (6,))
128 | 
129 |         if fill:
130 |             # Get list of all states as tuples
131 |             id_list = self.num_disks*[0] + self.num_disks*[1] + self.num_disks*[2]
132 |             states = list(itertools.permutations(id_list, self.num_disks))
133 | 
134 |             for state in states:
135 |                 for action in range(6):
136 |                     move = action_to_move[action]
137 |                     disks_from = []
138 |                     disks_to = []
139 |                     for d in range(self.num_disks):
140 |                         if state[d] == move[0]: disks_from.append(d)
141 |                         elif state[d] == move[1]: disks_to.append(d)
142 | 
143 |                     if disks_from: valid = (min(disks_to) > min(disks_from)) if disks_to else True
144 |                     else: valid = False
145 | 
146 |                     if not valid: mov_map[state][action] = -np.inf
147 | 
148 |                     move_from = [m[0] for m in action_to_move]
149 |                     move_to = [m[1] for m in action_to_move]
150 | 
151 |         # # Try to get rid of action loop - vectorize...
152 |         # for state in states:
153 |         #     s = np.array(state)
154 |         #     disks_from = []
155 |         #     disks_to = []
156 |         #
157 |         #     for d in range(self.num_disks):
158 |         #         a_from = [a for a, v in enumerate(move_from) if v == s[d]]
159 |         #         a_to = [a for a, v in enumerate(move_to) if v == s[d]]
160 |         #
161 |         #         if disks_from:
162 |         #             valid = (min(disks_to) > min(disks_from)) if disks_to else True
163 |         #         else:
164 |         #             valid = False
165 |         #
166 |         #         if not valid:
167 |         #             mov_map[state][action] = -np.inf
168 |         return mov_map
169 | 
170 | 
171 | action_to_move = [(0, 1), (0, 2), (1, 0),
172 |                   (1, 2), (2, 0), (2, 1)]
173 | 
174 | # action_to_move = {0: (0, 1), 1: (0, 2), 2: (1, 0),
175 | #                   3: (1, 2), 4: (2, 0), 5: (2, 1)}
176 | 


--------------------------------------------------------------------------------