├── .gitignore ├── training.png ├── screenshot.gif ├── gym_moving_dot ├── envs │ ├── __init__.py │ ├── moving_dot_env_test.py │ └── moving_dot_env.py └── __init__.py ├── requirements.txt ├── setup.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | venv 3 | 4 | __pycache__ -------------------------------------------------------------------------------- /training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrahtz/gym-moving-dot/HEAD/training.png -------------------------------------------------------------------------------- /screenshot.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mrahtz/gym-moving-dot/HEAD/screenshot.gif -------------------------------------------------------------------------------- /gym_moving_dot/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_moving_dot.envs.moving_dot_env import MovingDotDiscreteEnv, MovingDotContinuousEnv 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cloudpickle==1.2.2 2 | future==0.18.2 3 | gym==0.15.3 4 | numpy==1.17.3 5 | pyglet==1.3.2 6 | scipy==1.3.1 7 | six==1.12.0 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='gym_moving_dot', 4 | version='2.0', 5 | packages=['gym_moving_dot', 'gym_moving_dot.envs'], 6 | install_requires=['gym', 'numpy'] 7 | ) 8 | -------------------------------------------------------------------------------- /gym_moving_dot/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='MovingDotDiscreteNoFrameskip-v0', 5 | entry_point='gym_moving_dot.envs.moving_dot_env:MovingDotDiscreteEnv' 6 | ) 7 | 8 | register( 9 | id='MovingDotDiscrete-v0', 10 | entry_point='gym_moving_dot.envs.moving_dot_env:MovingDotDiscreteEnv' 11 | ) 12 | 13 | register( 14 | id='MovingDotContinuousNoFrameskip-v0', 15 | entry_point='gym_moving_dot.envs.moving_dot_env:MovingDotContinuousEnv' 16 | ) 17 | 18 | register( 19 | id='MovingDotContinuous-v0', 20 | entry_point='gym_moving_dot.envs.moving_dot_env:MovingDotContinuousEnv' 21 | ) 22 | -------------------------------------------------------------------------------- /gym_moving_dot/envs/moving_dot_env_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test all envs implemented over small number of steps 3 | """ 4 | 5 | import gym 6 | import gym_moving_dot 7 | 8 | ENVS = ["MovingDotDiscrete-v0", 9 | "MovingDotDiscreteNoFrameskip-v0", 10 | "MovingDotContinuous-v0", 11 | "MovingDotContinuousNoFrameskip-v0"] 12 | 13 | for env_name in ENVS: 14 | print("=== Test: {} ===".format(env_name)) 15 | 16 | env = gym.make(env_name) 17 | env.random_start = False 18 | 19 | env.reset() 20 | 21 | for i in range(3): 22 | a = env.action_space.sample() 23 | o, r, d, info = env.step(a) 24 | print("Obs shape: {}, Action: {}, Reward: {}, Done flag: {}, Info: {}".format(o.shape, a, r, d, info)) 25 | 26 | env.close() 27 | del env 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gym-moving-dot 2 | 3 | A simple environment for OpenAI gym consisting of a white dot moving around in 4 | a black square, designed as a simple test environment for reinforcement 5 | learning experiments. 6 | 7 | ![](screenshot.gif) 8 | 9 | Observations are given as 210 x 160 pixel image with 3 channels for red, green 10 | and blue; the same size as Atari environments. The white dot has pixel values 11 | (255, 255, 255), while the black square has pixel values (0, 0, 0). 12 | 13 | Possible actions are: 14 | - Discrete `action_space` 15 | * 0: do nothing 16 | * 1: move down 17 | * 2: move right 18 | * 3: move up 19 | * 4: move left 20 | - Continuous `action_space` 21 | * Action: 1 x 2 vector for [move_on_x, move_on_y] 22 | * Range: -1 <= move_on_x <= 1, -1 <= move_on_y <= 1 23 | * Rules 24 | ```shell 25 | # Rules on executing an action 26 | new_x = original_x_pos + 1 if move_on_x >= threshold else original_x_pos - 1 27 | new_y = original_y_pos + 1 if move_on_y >= threshold else original_y_pos - 1 28 | ``` 29 | 30 | Rewards are given based on how far the dot is from the centre. 31 | * If the dot moves closer to the centre, it receives reward +1. 32 | * If the dot moves further away from the centre, it receives reward -1. 33 | * If the dot sames the same distance from the centre, it receives reward 0. 34 | 35 | The episode terminates after a given number of steps have been taken (by 36 | default 1,000). If `env.random_start` is set to True (the default), the dot 37 | starts in a different position at the start of each episode. Otherwise, the dot 38 | starts at the top left corner. 39 | 40 | Training with actor-critic (A2C from OpenAI's baselines with one worker) takes 41 | about five minutes to achieve good reward. After about 20 minutes of training, 42 | expect your graphs to look something like: 43 | 44 | ![](training.png) 45 | 46 | ## Installation 47 | 48 | `pip install --user git+https://github.com/mrahtz/gym-moving-dot` 49 | 50 | ## Dependencies 51 | 52 | `pip install -r requirements.txt` 53 | 54 | ## Usage 55 | 56 | ```python 57 | import gym 58 | import gym_moving_dot 59 | 60 | ENVS = ["MovingDotDiscrete-v0", 61 | "MovingDotDiscreteNoFrameskip-v0", 62 | "MovingDotContinuous-v0", 63 | "MovingDotContinuousNoFrameskip-v0"] 64 | 65 | for env_name in ENVS: 66 | print("=== Test: {} ===".format(env_name)) 67 | 68 | env = gym.make(env_name) 69 | env.random_start = False 70 | 71 | env.reset() 72 | 73 | for i in range(3): 74 | a = env.action_space.sample() 75 | o, r, d, info = env.step(a) 76 | print("Obs shape: {}, Action: {}, Reward: {}, Done flag: {}, Info: {}".format(o.shape, a, r, d, info)) 77 | 78 | env.close() 79 | del env 80 | ``` 81 | 82 | ## Update 83 | - 1/11/2019: 84 | - update to be compatible with the latest gym package 85 | - add the continuous `action_space` version 86 | - 16/12/2019: 87 | - separate the existing classes into the parent and subclasses -------------------------------------------------------------------------------- /gym_moving_dot/envs/moving_dot_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple OpenAI gym environment consisting of a white dot moving in a black 3 | square. 4 | """ 5 | 6 | import gym 7 | from gym import spaces 8 | from gym.utils import seeding 9 | import numpy as np 10 | 11 | 12 | class ALE(object): 13 | def __init__(self): 14 | self.lives = lambda: 0 15 | 16 | 17 | class MovingDotEnv(gym.Env): 18 | """ Base class for MovingDot game """ 19 | metadata = {'render.modes': ['human']} 20 | 21 | def __init__(self): 22 | super(gym.Env, self).__init__() 23 | 24 | # Environment parameters 25 | self.dot_size = [2, 2] 26 | self.random_start = True 27 | self.max_steps = 1000 28 | 29 | # environment setup 30 | self.observation_space = spaces.Box(low=0, 31 | high=255, 32 | shape=(210, 160, 3)) 33 | self.centre = np.array([80, 105]) 34 | self.viewer = None 35 | 36 | self.seed() 37 | 38 | # Needed by atari_wrappers in OpenAI baselines 39 | self.ale = ALE() 40 | seed = None 41 | self.np_random, _ = seeding.np_random(seed) 42 | 43 | self.reset() 44 | 45 | def reset(self): 46 | if self.random_start: 47 | x = self.np_random.randint(low=0, high=160) 48 | y = self.np_random.randint(low=0, high=210) 49 | self.pos = [x, y] 50 | else: 51 | self.pos = [0, 0] 52 | self.steps = 0 53 | ob = self._get_ob() 54 | return ob 55 | 56 | # This is important because for e.g. A3C each worker should be exploring 57 | # the environment differently, therefore seeds the random number generator 58 | # of each environment differently. (This influences the random start 59 | # location.) 60 | def seed(self, seed=None): 61 | self.np_random, seed = seeding.np_random(seed) 62 | return [seed] 63 | 64 | def _get_ob(self): 65 | ob = np.zeros((210, 160, 3), dtype=np.uint8) 66 | x = self.pos[0] 67 | y = self.pos[1] 68 | w = self.dot_size[0] 69 | h = self.dot_size[1] 70 | ob[y - h:y + h, x - w:x + w, :] = 255 71 | return ob 72 | 73 | def get_action_meanings(self): 74 | return ['NOOP', 'DOWN', 'RIGHT', 'UP', 'LEFT'] 75 | 76 | def step(self, action): 77 | prev_pos = self.pos[:] 78 | 79 | self._update_pos(action) 80 | 81 | ob = self._get_ob() 82 | 83 | self.steps += 1 84 | if self.steps < self.max_steps: 85 | episode_over = False 86 | else: 87 | episode_over = True 88 | 89 | dist1 = np.linalg.norm(prev_pos - self.centre) 90 | dist2 = np.linalg.norm(self.pos - self.centre) 91 | if dist2 < dist1: 92 | reward = 1 93 | elif dist2 == dist1: 94 | reward = 0 95 | else: 96 | reward = -1 97 | 98 | return ob, reward, episode_over, {} 99 | 100 | def _update_pos(self, action): 101 | """ subclass is supposed to implement the logic 102 | to update the frame given an action at t """ 103 | raise NotImplementedError 104 | 105 | # Based on gym's atari_env.py 106 | def render(self, mode='human', close=False): 107 | if close: 108 | if self.viewer is not None: 109 | self.viewer.close() 110 | self.viewer = None 111 | return 112 | 113 | # We only import this here in case we're running on a headless server 114 | from gym.envs.classic_control import rendering 115 | assert mode == 'human', "MovingDot only supports human render mode" 116 | img = self._get_ob() 117 | if self.viewer is None: 118 | self.viewer = rendering.SimpleImageViewer() 119 | self.viewer.imshow(img) 120 | 121 | 122 | class MovingDotDiscreteEnv(MovingDotEnv): 123 | """ Discrete Action MovingDot env """ 124 | def __init__(self): 125 | super(MovingDotDiscreteEnv, self).__init__() 126 | self.action_space = spaces.Discrete(5) 127 | 128 | def _update_pos(self, action): 129 | assert action >= 0 and action <= 4 130 | 131 | if action == 0: 132 | # NOOP 133 | pass 134 | elif action == 1: 135 | self.pos[1] += 1 136 | elif action == 2: 137 | self.pos[0] += 1 138 | elif action == 3: 139 | self.pos[1] -= 1 140 | elif action == 4: 141 | self.pos[0] -= 1 142 | self.pos[0] = np.clip(self.pos[0], 143 | self.dot_size[0], 159 - self.dot_size[0]) 144 | self.pos[1] = np.clip(self.pos[1], 145 | self.dot_size[1], 209 - self.dot_size[1]) 146 | 147 | 148 | class MovingDotContinuousEnv(MovingDotEnv): 149 | """ Continuous Action MovingDot env """ 150 | def __init__(self, low=-1, high=1, moving_thd=0.1): # moving_thd is empirically determined 151 | super(MovingDotContinuousEnv, self).__init__() 152 | 153 | self._high = high 154 | self._low = low 155 | self._moving_thd = moving_thd # used to decide if the object has to move, see step func below. 156 | self.action_space = spaces.Box(low=low, high=high, shape=(2,), dtype=np.float32) 157 | 158 | def _update_pos(self, action): 159 | _x, _y = action 160 | assert self._low <= _x <= self._high, "movement along x-axis has to fall in between -1 to 1" 161 | assert self._low <= _y <= self._high, "movement along y-axis has to fall in between -1 to 1" 162 | 163 | """ 164 | [Note] 165 | Since the action values are continuous for each x/y pos, 166 | we round the position of the object after executing the action on the 2D space. 167 | """ 168 | new_x = self.pos[0] + 1 if _x >= self._moving_thd else self.pos[0] - 1 169 | new_y = self.pos[1] + 1 if _y >= self._moving_thd else self.pos[1] - 1 170 | 171 | self.pos[0] = np.clip(new_x, 172 | self.dot_size[0], 159 - self.dot_size[0]) 173 | self.pos[1] = np.clip(new_y, 174 | self.dot_size[1], 209 - self.dot_size[1]) 175 | 176 | --------------------------------------------------------------------------------