├── tests
├── __init__.py
└── gym
│ ├── __init__.py
│ ├── envs
│ ├── __init__.py
│ └── robotics
│ │ ├── __init__.py
│ │ └── hand
│ │ ├── __init__.py
│ │ ├── test_reach.py
│ │ ├── test_manipulate.py
│ │ └── test_manipulate_touch_sensors.py
│ └── wrappers
│ └── __init__.py
├── .dockerignore
├── gym
├── envs
│ ├── tests
│ │ ├── __init__.py
│ │ ├── rollout.json
│ │ ├── test_kellycoinflip.py
│ │ ├── test_frozenlake_dfs.py
│ │ ├── spec_list.py
│ │ ├── test_registration.py
│ │ ├── test_envs.py
│ │ ├── test_determinism.py
│ │ └── test_mujoco_v2_to_v3_conversion.py
│ ├── algorithmic
│ │ ├── tests
│ │ │ └── __init__.py
│ │ ├── __init__.py
│ │ ├── copy_.py
│ │ ├── reverse.py
│ │ ├── repeat_copy.py
│ │ ├── duplicated_input.py
│ │ └── reversed_addition.py
│ ├── robotics
│ │ ├── assets
│ │ │ ├── stls
│ │ │ │ ├── .get
│ │ │ │ ├── hand
│ │ │ │ │ ├── F1.stl
│ │ │ │ │ ├── F2.stl
│ │ │ │ │ ├── F3.stl
│ │ │ │ │ ├── TH1_z.stl
│ │ │ │ │ ├── TH2_z.stl
│ │ │ │ │ ├── TH3_z.stl
│ │ │ │ │ ├── palm.stl
│ │ │ │ │ ├── wrist.stl
│ │ │ │ │ ├── knuckle.stl
│ │ │ │ │ ├── lfmetacarpal.stl
│ │ │ │ │ ├── forearm_electric.stl
│ │ │ │ │ └── forearm_electric_cvx.stl
│ │ │ │ └── fetch
│ │ │ │ │ ├── estop_link.stl
│ │ │ │ │ ├── gripper_link.stl
│ │ │ │ │ ├── laser_link.stl
│ │ │ │ │ ├── torso_fixed_link.stl
│ │ │ │ │ ├── base_link_collision.stl
│ │ │ │ │ ├── bellows_link_collision.stl
│ │ │ │ │ ├── l_wheel_link_collision.stl
│ │ │ │ │ ├── r_wheel_link_collision.stl
│ │ │ │ │ ├── head_pan_link_collision.stl
│ │ │ │ │ ├── head_tilt_link_collision.stl
│ │ │ │ │ ├── elbow_flex_link_collision.stl
│ │ │ │ │ ├── forearm_roll_link_collision.stl
│ │ │ │ │ ├── shoulder_pan_link_collision.stl
│ │ │ │ │ ├── torso_lift_link_collision.stl
│ │ │ │ │ ├── wrist_flex_link_collision.stl
│ │ │ │ │ ├── wrist_roll_link_collision.stl
│ │ │ │ │ ├── shoulder_lift_link_collision.stl
│ │ │ │ │ └── upperarm_roll_link_collision.stl
│ │ │ ├── textures
│ │ │ │ ├── block.png
│ │ │ │ └── block_hidden.png
│ │ │ ├── fetch
│ │ │ │ ├── reach.xml
│ │ │ │ ├── push.xml
│ │ │ │ ├── slide.xml
│ │ │ │ └── pick_and_place.xml
│ │ │ └── hand
│ │ │ │ ├── reach.xml
│ │ │ │ ├── shared_asset.xml
│ │ │ │ ├── manipulate_pen.xml
│ │ │ │ ├── manipulate_pen_touch_sensors.xml
│ │ │ │ ├── manipulate_egg.xml
│ │ │ │ ├── manipulate_block.xml
│ │ │ │ ├── manipulate_egg_touch_sensors.xml
│ │ │ │ └── manipulate_block_touch_sensors.xml
│ │ ├── fetch
│ │ │ ├── __init__.py
│ │ │ ├── reach.py
│ │ │ ├── push.py
│ │ │ ├── pick_and_place.py
│ │ │ └── slide.py
│ │ ├── hand
│ │ │ └── __init__.py
│ │ ├── __init__.py
│ │ ├── hand_env.py
│ │ └── README.md
│ ├── atari
│ │ └── __init__.py
│ ├── classic_control
│ │ ├── assets
│ │ │ └── clockwise.png
│ │ └── __init__.py
│ ├── unittest
│ │ └── __init__.py
│ ├── box2d
│ │ ├── __init__.py
│ │ └── test_lunar_lander.py
│ ├── README.md
│ ├── toy_text
│ │ ├── __init__.py
│ │ ├── roulette.py
│ │ ├── discrete.py
│ │ ├── nchain.py
│ │ └── hotter_colder.py
│ └── mujoco
│ │ ├── __init__.py
│ │ ├── inverted_pendulum.py
│ │ ├── swimmer.py
│ │ ├── half_cheetah.py
│ │ ├── assets
│ │ ├── inverted_pendulum.xml
│ │ ├── point.xml
│ │ ├── inverted_double_pendulum.xml
│ │ ├── swimmer.xml
│ │ ├── reacher.xml
│ │ └── hopper.xml
│ │ ├── walker2d.py
│ │ ├── hopper.py
│ │ ├── inverted_double_pendulum.py
│ │ ├── reacher.py
│ │ ├── ant.py
│ │ ├── humanoidstandup.py
│ │ ├── pusher.py
│ │ ├── humanoid.py
│ │ ├── thrower.py
│ │ ├── striker.py
│ │ └── half_cheetah_v3.py
├── spaces
│ ├── tests
│ │ └── __init__.py
│ ├── __init__.py
│ ├── multi_binary.py
│ ├── discrete.py
│ ├── space.py
│ ├── tuple.py
│ ├── multi_discrete.py
│ └── utils.py
├── vector
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_vector_env.py
│ │ ├── test_spaces.py
│ │ ├── utils.py
│ │ └── test_sync_vector_env.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── misc.py
│ │ └── spaces.py
│ └── __init__.py
├── wrappers
│ ├── tests
│ │ └── __init__.py
│ ├── monitoring
│ │ ├── __init__.py
│ │ └── tests
│ │ │ ├── __init__.py
│ │ │ ├── helpers.py
│ │ │ └── test_video_recorder.py
│ ├── clip_action.py
│ ├── flatten_observation.py
│ ├── test_clip_action.py
│ ├── transform_reward.py
│ ├── transform_observation.py
│ ├── __init__.py
│ ├── test_rescale_action.py
│ ├── test_record_episode_statistics.py
│ ├── test_resize_observation.py
│ ├── test_transform_observation.py
│ ├── resize_observation.py
│ ├── time_limit.py
│ ├── gray_scale_observation.py
│ ├── README.md
│ ├── test_flatten_observation.py
│ ├── test_frame_stack.py
│ ├── rescale_action.py
│ ├── test_gray_scale_observation.py
│ ├── record_episode_statistics.py
│ ├── test_transform_reward.py
│ ├── test_atari_preprocessing.py
│ ├── filter_observation.py
│ └── test_filter_observation.py
├── version.py
├── tests
│ └── test_core.py
├── utils
│ ├── __init__.py
│ ├── tests
│ │ ├── test_seeding.py
│ │ └── test_atexit.py
│ ├── json_utils.py
│ ├── colorize.py
│ ├── ezpickle.py
│ ├── atomic_write.py
│ └── closer.py
├── __init__.py
└── logger.py
├── examples
├── scripts
│ ├── list_envs
│ └── sim_env
└── agents
│ ├── _policies.py
│ ├── random_agent.py
│ └── keyboard_agent.py
├── docs
├── misc.md
├── wrappers.md
├── readme.md
├── creating-environments.md
└── agents.md
├── .gitignore
├── bin
├── docker_entrypoint
└── render.py
├── .travis.yml
├── CODE_OF_CONDUCT.rst
├── py.Dockerfile
├── LICENSE.md
├── .github
└── stale.yml
└── setup.py
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .tox
2 |
--------------------------------------------------------------------------------
/tests/gym/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/spaces/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/vector/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/gym/envs/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/tests/rollout.json:
--------------------------------------------------------------------------------
1 | {}
--------------------------------------------------------------------------------
/gym/wrappers/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/gym/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/.get:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/robotics/hand/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '0.15.4'
2 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/gym/envs/robotics/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/gym/envs/robotics/hand/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/atari/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.atari.atari_env import AtariEnv
2 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/F1.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/F2.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/F3.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH1_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/TH1_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH2_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/TH2_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH3_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/TH3_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/palm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/palm.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/wrist.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/wrist.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/textures/block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/textures/block.png
--------------------------------------------------------------------------------
/gym/envs/classic_control/assets/clockwise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/classic_control/assets/clockwise.png
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/knuckle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/knuckle.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/estop_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/estop_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/gripper_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/gripper_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/laser_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/laser_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/textures/block_hidden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/textures/block_hidden.png
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/forearm_electric.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/forearm_electric.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl
--------------------------------------------------------------------------------
/examples/scripts/list_envs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from gym import envs
3 | envids = [spec.id for spec in envs.registry.all()]
4 | for envid in sorted(envids):
5 | print(envid)
6 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/helpers.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import shutil
3 | import tempfile
4 |
5 | @contextlib.contextmanager
6 | def tempdir():
7 | temp = tempfile.mkdtemp()
8 | yield temp
9 | shutil.rmtree(temp)
10 |
--------------------------------------------------------------------------------
/gym/envs/unittest/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.unittest.cube_crash import CubeCrash
2 | from gym.envs.unittest.cube_crash import CubeCrashSparse
3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack
4 | from gym.envs.unittest.memorize_digits import MemorizeDigits
5 |
6 |
--------------------------------------------------------------------------------
/docs/misc.md:
--------------------------------------------------------------------------------
1 | # Miscellaneous
2 |
3 | Here we have a bunch of tools, libs, apis, tutorials, resources, etc. provided by the community to add value to the gym ecosystem.
4 |
5 | ## OpenAIGym.jl
6 |
7 | Convenience wrapper of the OpenAI Gym for the Julia language [/tbreloff/OpenAIGym.jl](https://github.com/tbreloff/OpenAIGym.jl)
--------------------------------------------------------------------------------
/gym/envs/algorithmic/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.algorithmic.copy_ import CopyEnv
2 | from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv
3 | from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv
4 | from gym.envs.algorithmic.reverse import ReverseEnv
5 | from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv
6 |
--------------------------------------------------------------------------------
/gym/envs/box2d/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | import Box2D
3 | from gym.envs.box2d.lunar_lander import LunarLander
4 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous
5 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
6 | from gym.envs.box2d.car_racing import CarRacing
7 | except ImportError:
8 | Box2D = None
9 |
--------------------------------------------------------------------------------
/gym/envs/classic_control/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.classic_control.cartpole import CartPoleEnv
2 | from gym.envs.classic_control.mountain_car import MountainCarEnv
3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
4 | from gym.envs.classic_control.pendulum import PendulumEnv
5 | from gym.envs.classic_control.acrobot import AcrobotEnv
6 |
7 |
--------------------------------------------------------------------------------
/gym/tests/test_core.py:
--------------------------------------------------------------------------------
1 | from gym import core
2 |
3 | class ArgumentEnv(core.Env):
4 | calls = 0
5 |
6 | def __init__(self, arg):
7 | self.calls += 1
8 | self.arg = arg
9 |
10 | def test_env_instantiation():
11 | # This looks like a pretty trivial, but given our usage of
12 | # __new__, it's worth having.
13 | env = ArgumentEnv('arg')
14 | assert env.arg == 'arg'
15 | assert env.calls == 1
16 |
--------------------------------------------------------------------------------
/gym/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """A set of common utilities used within the environments. These are
2 | not intended as API functions, and will not remain stable over time.
3 | """
4 |
5 | # These submodules should not have any import-time dependencies.
6 | # We want this since we use `utils` during our import-time sanity checks
7 | # that verify that our dependencies are actually present.
8 | from .colorize import colorize
9 | from .ezpickle import EzPickle
10 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/copy_.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to copy content from the input tape to
3 | the output tape. http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 |
8 | class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | def __init__(self, base=5, chars=True):
10 | super(CopyEnv, self).__init__(base=base, chars=chars)
11 |
12 | def target_from_input_data(self, input_data):
13 | return input_data
14 |
--------------------------------------------------------------------------------
/gym/__init__.py:
--------------------------------------------------------------------------------
1 | import distutils.version
2 | import os
3 | import sys
4 | import warnings
5 |
6 | from gym import error
7 | from gym.version import VERSION as __version__
8 |
9 | from gym.core import Env, GoalEnv, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
10 | from gym.spaces import Space
11 | from gym.envs import make, spec, register
12 | from gym import logger
13 | from gym import vector
14 |
15 | __all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"]
16 |
--------------------------------------------------------------------------------
/gym/wrappers/clip_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym import ActionWrapper
4 | from gym.spaces import Box
5 |
6 |
7 | class ClipAction(ActionWrapper):
8 | r"""Clip the continuous action within the valid bound. """
9 | def __init__(self, env):
10 | assert isinstance(env.action_space, Box)
11 | super(ClipAction, self).__init__(env)
12 |
13 | def action(self, action):
14 | return np.clip(action, self.action_space.low, self.action_space.high)
15 |
--------------------------------------------------------------------------------
/gym/utils/tests/test_seeding.py:
--------------------------------------------------------------------------------
1 | from gym import error
2 | from gym.utils import seeding
3 |
4 | def test_invalid_seeds():
5 | for seed in [-1, 'test']:
6 | try:
7 | seeding.np_random(seed)
8 | except error.Error:
9 | pass
10 | else:
11 | assert False, 'Invalid seed {} passed validation'.format(seed)
12 |
13 | def test_valid_seeds():
14 | for seed in [0, 1]:
15 | random, seed1 = seeding.np_random(seed)
16 | assert seed == seed1
17 |
--------------------------------------------------------------------------------
/gym/envs/README.md:
--------------------------------------------------------------------------------
1 | # Envs
2 |
3 | These are the core integrated environments. Note that we may later
4 | restructure any of the files, but will keep the environments available
5 | at the relevant package's top-level. So for example, you should access
6 | `AntEnv` as follows:
7 |
8 | ```
9 | # Will be supported in future releases
10 | from gym.envs import mujoco
11 | mujoco.AntEnv
12 | ```
13 |
14 | Rather than:
15 |
16 | ```
17 | # May break in future releases
18 | from gym.envs.mujoco import ant
19 | ant.AntEnv
20 | ```
--------------------------------------------------------------------------------
/gym/envs/algorithmic/reverse.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to reverse content over the input tape.
3 | http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 |
8 | class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
10 |
11 | def __init__(self, base=2):
12 | super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
13 | self.last = 50
14 |
15 | def target_from_input_data(self, input_str):
16 | return list(reversed(input_str))
17 |
--------------------------------------------------------------------------------
/gym/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.spaces.space import Space
2 | from gym.spaces.box import Box
3 | from gym.spaces.discrete import Discrete
4 | from gym.spaces.multi_discrete import MultiDiscrete
5 | from gym.spaces.multi_binary import MultiBinary
6 | from gym.spaces.tuple import Tuple
7 | from gym.spaces.dict import Dict
8 |
9 | from gym.spaces.utils import flatdim
10 | from gym.spaces.utils import flatten
11 | from gym.spaces.utils import unflatten
12 |
13 | __all__ = ["Space", "Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict", "flatdim", "flatten", "unflatten"]
14 |
--------------------------------------------------------------------------------
/tests/gym/envs/robotics/hand/test_reach.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | import pytest
4 |
5 | from gym import envs
6 | from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
7 |
8 |
9 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE)
10 | def test_serialize_deserialize():
11 | env1 = envs.make('HandReach-v0', distance_threshold=1e-6)
12 | env1.reset()
13 | env2 = pickle.loads(pickle.dumps(env1))
14 |
15 | assert env1.distance_threshold == env2.distance_threshold, (
16 | env1.distance_threshold, env2.distance_threshold)
17 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | *.py~
4 | .DS_Store
5 | .cache
6 | .pytest_cache/
7 |
8 | # Setuptools distribution and build folders.
9 | /dist/
10 | /build
11 |
12 | # Virtualenv
13 | /env
14 |
15 | # Python egg metadata, regenerated from source files by setuptools.
16 | /*.egg-info
17 |
18 | *.sublime-project
19 | *.sublime-workspace
20 |
21 | logs/
22 |
23 | .ipynb_checkpoints
24 | ghostdriver.log
25 |
26 | junk
27 | MUJOCO_LOG.txt
28 |
29 | rllab_mujoco
30 |
31 | tutorial/*.html
32 |
33 | # IDE files
34 | .eggs
35 | .tox
36 |
37 | # PyCharm project files
38 | .idea
39 | vizdoom.ini
40 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_kellycoinflip.py:
--------------------------------------------------------------------------------
1 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv
2 |
3 |
4 | class TestKellyCoinflipEnv:
5 | @staticmethod
6 | def test_done_when_reaches_max_wealth():
7 | # https://github.com/openai/gym/issues/1266
8 | env = KellyCoinflipEnv()
9 | env.seed(1)
10 | env.reset()
11 | done = False
12 |
13 | while not done:
14 | action = int(env.wealth * 20) # bet 20% of the wealth
15 | observation, reward, done, info = env.step(action)
16 |
17 | assert env.wealth == env.max_wealth
18 |
--------------------------------------------------------------------------------
/gym/utils/tests/test_atexit.py:
--------------------------------------------------------------------------------
1 | from gym.utils.closer import Closer
2 |
3 | class Closeable(object):
4 | close_called = False
5 | def close(self):
6 | self.close_called = True
7 |
8 | def test_register_unregister():
9 | registry = Closer(atexit_register=False)
10 | c1 = Closeable()
11 | c2 = Closeable()
12 |
13 | assert not c1.close_called
14 | assert not c2.close_called
15 | registry.register(c1)
16 | id2 = registry.register(c2)
17 |
18 | registry.unregister(id2)
19 | registry.close()
20 | assert c1.close_called
21 | assert not c2.close_called
22 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/repeat_copy.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to copy content multiple times from the input tape to
3 | the output tape. http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 |
8 | class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
10 |
11 | def __init__(self, base=5):
12 | super(RepeatCopyEnv, self).__init__(base=base, chars=True)
13 | self.last = 50
14 |
15 | def target_from_input_data(self, input_data):
16 | return input_data + list(reversed(input_data)) + input_data
17 |
--------------------------------------------------------------------------------
/bin/docker_entrypoint:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is the entrypoint for our Docker image.
3 |
4 | set -ex
5 |
6 | # Set up display; otherwise rendering will fail
7 | Xvfb -screen 0 1024x768x24 &
8 | export DISPLAY=:0
9 |
10 | # Wait for the file to come up
11 | display=0
12 | file="/tmp/.X11-unix/X$display"
13 | for i in $(seq 1 10); do
14 | if [ -e "$file" ]; then
15 | break
16 | fi
17 |
18 | echo "Waiting for $file to be created (try $i/10)"
19 | sleep "$i"
20 | done
21 | if ! [ -e "$file" ]; then
22 | echo "Timing out: $file was not created"
23 | exit 1
24 | fi
25 |
26 | exec "$@"
27 |
--------------------------------------------------------------------------------
/bin/render.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import argparse
3 | import gym
4 |
5 |
6 | parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.')
7 | parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v1')
8 | parser.add_argument('--step', type=int, default=1)
9 | args = parser.parse_args()
10 |
11 | env = gym.make(args.env_id)
12 | env.reset()
13 |
14 | step = 0
15 | while True:
16 | if args.step:
17 | env.step(env.action_space.sample())
18 | env.render()
19 | if step % 10 == 0:
20 | env.reset()
21 | step += 1
22 |
--------------------------------------------------------------------------------
/docs/wrappers.md:
--------------------------------------------------------------------------------
1 | # Wrappers
2 |
3 | ## Space Wrappers
4 | Wrappers that transform observation and/or action space. Contains
5 | * Discretize (make a discrete version of a continuous space)
6 | * Flatten (put all actions/observations into a single dimension)
7 | * Rescale (rescale the range of values for continuous spaces).
8 |
9 | Learn more here: https://github.com/ngc92/space-wrappers
10 |
11 | ## Utility wrappers for Atari Games
12 | The baseline repository contains wrappers that are used when doing Atari
13 | experiments.
14 | These can be found here: https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py
15 |
--------------------------------------------------------------------------------
/gym/vector/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.vector.utils.misc import CloudpickleWrapper, clear_mpi_env_vars
2 | from gym.vector.utils.numpy_utils import concatenate, create_empty_array
3 | from gym.vector.utils.shared_memory import create_shared_memory, read_from_shared_memory, write_to_shared_memory
4 | from gym.vector.utils.spaces import _BaseGymSpaces, batch_space
5 |
6 | __all__ = [
7 | 'CloudpickleWrapper',
8 | 'clear_mpi_env_vars',
9 | 'concatenate',
10 | 'create_empty_array',
11 | 'create_shared_memory',
12 | 'read_from_shared_memory',
13 | 'write_to_shared_memory',
14 | '_BaseGymSpaces',
15 | 'batch_space'
16 | ]
17 |
--------------------------------------------------------------------------------
/gym/wrappers/flatten_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gym.spaces as spaces
3 | from gym import ObservationWrapper
4 |
5 |
6 | class FlattenObservation(ObservationWrapper):
7 | r"""Observation wrapper that flattens the observation."""
8 | def __init__(self, env):
9 | super(FlattenObservation, self).__init__(env)
10 |
11 | flatdim = spaces.flatdim(env.observation_space)
12 | self.observation_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(flatdim,), dtype=np.float32)
13 |
14 | def observation(self, observation):
15 | return spaces.flatten(self.env.observation_space, observation)
16 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: xenial
2 | language: python
3 | python:
4 | - "3.7"
5 | services:
6 | - docker
7 | env:
8 | # - UBUNTU_VER=14.04 - problems with atari-py
9 | - PY_VER=2.7
10 | - PY_VER=3.5.6
11 | - PY_VER=3.6.8
12 | - PY_VER=3.7.3
13 |
14 | install: "" # so travis doesn't do pip install requirements.txt
15 | script:
16 | - docker build -f py.Dockerfile --build-arg MUJOCO_KEY=$MUJOCO_KEY --build-arg PYTHON_VER=$PY_VER -t gym-test .
17 | - docker run gym-test
18 |
19 | deploy:
20 | provider: pypi
21 | username: $TWINE_USERNAME
22 | password: $TWINE_PASSWORD
23 | on:
24 | tags: true
25 | condition: $PY_VER = 3.5.6
26 |
--------------------------------------------------------------------------------
/examples/agents/_policies.py:
--------------------------------------------------------------------------------
1 | # Support code for cem.py
2 |
3 | class BinaryActionLinearPolicy(object):
4 | def __init__(self, theta):
5 | self.w = theta[:-1]
6 | self.b = theta[-1]
7 | def act(self, ob):
8 | y = ob.dot(self.w) + self.b
9 | a = int(y < 0)
10 | return a
11 |
12 | class ContinuousActionLinearPolicy(object):
13 | def __init__(self, theta, n_in, n_out):
14 | assert len(theta) == (n_in + 1) * n_out
15 | self.W = theta[0 : n_in * n_out].reshape(n_in, n_out)
16 | self.b = theta[n_in * n_out : None].reshape(1, n_out)
17 | def act(self, ob):
18 | a = ob.dot(self.W) + self.b
19 | return a
20 |
--------------------------------------------------------------------------------
/gym/utils/json_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def json_encode_np(obj):
4 | """
5 | JSON can't serialize numpy types, so convert to pure python
6 | """
7 | if isinstance(obj, np.ndarray):
8 | return list(obj)
9 | elif isinstance(obj, np.float32):
10 | return float(obj)
11 | elif isinstance(obj, np.float64):
12 | return float(obj)
13 | elif isinstance(obj, np.int8):
14 | return int(obj)
15 | elif isinstance(obj, np.int16):
16 | return int(obj)
17 | elif isinstance(obj, np.int32):
18 | return int(obj)
19 | elif isinstance(obj, np.int64):
20 | return int(obj)
21 | else:
22 | return obj
23 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
1 | OpenAI Gym is dedicated to providing a harassment-free experience for
2 | everyone, regardless of gender, gender identity and expression, sexual
3 | orientation, disability, physical appearance, body size, age, race, or
4 | religion. We do not tolerate harassment of participants in any form.
5 |
6 | This code of conduct applies to all OpenAI Gym spaces (including Gist
7 | comments) both online and off. Anyone who violates this code of
8 | conduct may be sanctioned or expelled from these spaces at the
9 | discretion of the OpenAI team.
10 |
11 | We may add additional rules over time, which will be made clearly
12 | available to participants. Participants are responsible for knowing
13 | and abiding by these rules.
14 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.toy_text.blackjack import BlackjackEnv
2 | from gym.envs.toy_text.roulette import RouletteEnv
3 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv
4 | from gym.envs.toy_text.nchain import NChainEnv
5 | from gym.envs.toy_text.hotter_colder import HotterColder
6 | from gym.envs.toy_text.guessing_game import GuessingGame
7 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv
8 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv
9 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv
10 | from gym.envs.toy_text.taxi import TaxiEnv
11 | from gym.envs.toy_text.guessing_game import GuessingGame
12 | from gym.envs.toy_text.hotter_colder import HotterColder
13 |
--------------------------------------------------------------------------------
/gym/envs/box2d/test_lunar_lander.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | try:
3 | import Box2D
4 | from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
5 | except ImportError:
6 | Box2D = None
7 |
8 |
9 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
10 | def test_lunar_lander():
11 | _test_lander(LunarLander(), seed=0)
12 |
13 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
14 | def test_lunar_lander_continuous():
15 | _test_lander(LunarLanderContinuous(), seed=0)
16 |
17 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
18 | def _test_lander(env, seed=None, render=False):
19 | total_reward = demo_heuristic_lander(env, seed=seed, render=render)
20 | assert total_reward > 100
21 |
22 |
23 |
--------------------------------------------------------------------------------
/docs/readme.md:
--------------------------------------------------------------------------------
1 | # Table of Contents
2 |
3 | - [Environments](environments.md) lists Gym environments to run your algorithms against.
4 |
5 | - [Creating your own Environments](creating-environments.md) how to create your own Gym environments.
6 |
7 | - [Wrappers](wrappers.md) list of general purpose wrappers for environments. These can perform pre/postprocessing on the data that is exchanged between the agent and the environment.
8 |
9 | - [Agents](agents.md) contains a listing of agents compatible with Gym environments. Agents facilitate the running of an algorithm against an environment.
10 |
11 | - [Miscellaneous](misc.md) is a collection of other value-add tools and utilities. These could be anything from a small convenience lib to a collection of video tutorials or a new language binding.
12 |
--------------------------------------------------------------------------------
/tests/gym/envs/robotics/hand/test_manipulate.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import unittest
3 |
4 | import pytest
5 |
6 | from gym import envs
7 | from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
8 |
9 |
10 | ENVIRONMENT_IDS = (
11 | 'HandManipulateEgg-v0',
12 | 'HandManipulatePen-v0',
13 | 'HandManipulateBlock-v0',
14 | )
15 |
16 |
17 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE)
18 | @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS)
19 | def test_serialize_deserialize(environment_id):
20 | env1 = envs.make(environment_id, target_position='fixed')
21 | env1.reset()
22 | env2 = pickle.loads(pickle.dumps(env1))
23 |
24 | assert env1.target_position == env2.target_position, (
25 | env1.target_position, env2.target_position)
26 |
--------------------------------------------------------------------------------
/gym/wrappers/test_clip_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym.wrappers import ClipAction
5 |
6 |
7 | def test_clip_action():
8 | # mountaincar: action-based rewards
9 | make_env = lambda: gym.make('MountainCarContinuous-v0')
10 | env = make_env()
11 | wrapped_env = ClipAction(make_env())
12 |
13 | seed = 0
14 | env.seed(seed)
15 | wrapped_env.seed(seed)
16 |
17 | env.reset()
18 | wrapped_env.reset()
19 |
20 | actions = [[.4], [1.2], [-0.3], [0.0], [-2.5]]
21 | for action in actions:
22 | obs1, r1, d1, _ = env.step(np.clip(action, env.action_space.low, env.action_space.high))
23 | obs2, r2, d2, _ = wrapped_env.step(action)
24 | assert np.allclose(r1, r2)
25 | assert np.allclose(obs1, obs2)
26 | assert d1 == d2
27 |
--------------------------------------------------------------------------------
/gym/envs/robotics/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.robotics.fetch_env import FetchEnv
2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv
3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv
4 | from gym.envs.robotics.fetch.push import FetchPushEnv
5 | from gym.envs.robotics.fetch.reach import FetchReachEnv
6 |
7 | from gym.envs.robotics.hand.reach import HandReachEnv
8 | from gym.envs.robotics.hand.manipulate import HandBlockEnv
9 | from gym.envs.robotics.hand.manipulate import HandEggEnv
10 | from gym.envs.robotics.hand.manipulate import HandPenEnv
11 |
12 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv
13 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv
14 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv
15 |
--------------------------------------------------------------------------------
/tests/gym/envs/robotics/hand/test_manipulate_touch_sensors.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | import pytest
4 |
5 | from gym import envs
6 | from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
7 |
8 |
9 | ENVIRONMENT_IDS = (
10 | 'HandManipulateEggTouchSensors-v1',
11 | 'HandManipulatePenTouchSensors-v0',
12 | 'HandManipulateBlockTouchSensors-v0',
13 | )
14 |
15 |
16 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE)
17 | @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS)
18 | def test_serialize_deserialize(environment_id):
19 | env1 = envs.make(environment_id, target_position='fixed')
20 | env1.reset()
21 | env2 = pickle.loads(pickle.dumps(env1))
22 |
23 | assert env1.target_position == env2.target_position, (
24 | env1.target_position, env2.target_position)
25 |
--------------------------------------------------------------------------------
/gym/wrappers/transform_reward.py:
--------------------------------------------------------------------------------
1 | from gym import RewardWrapper
2 |
3 |
4 | class TransformReward(RewardWrapper):
5 | r"""Transform the reward via an arbitrary function.
6 |
7 | Example::
8 |
9 | >>> import gym
10 | >>> env = gym.make('CartPole-v1')
11 | >>> env = TransformReward(env, lambda r: 0.01*r)
12 | >>> env.reset()
13 | >>> observation, reward, done, info = env.step(env.action_space.sample())
14 | >>> reward
15 | 0.01
16 |
17 | Args:
18 | env (Env): environment
19 | f (callable): a function that transforms the reward
20 |
21 | """
22 | def __init__(self, env, f):
23 | super(TransformReward, self).__init__(env)
24 | assert callable(f)
25 | self.f = f
26 |
27 | def reward(self, reward):
28 | return self.f(reward)
29 |
--------------------------------------------------------------------------------
/gym/logger.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from gym.utils import colorize
4 |
5 | DEBUG = 10
6 | INFO = 20
7 | WARN = 30
8 | ERROR = 40
9 | DISABLED = 50
10 |
11 | MIN_LEVEL = 30
12 |
13 | def set_level(level):
14 | """
15 | Set logging threshold on current logger.
16 | """
17 | global MIN_LEVEL
18 | MIN_LEVEL = level
19 |
20 | def debug(msg, *args):
21 | if MIN_LEVEL <= DEBUG:
22 | print('%s: %s'%('DEBUG', msg % args))
23 |
24 | def info(msg, *args):
25 | if MIN_LEVEL <= INFO:
26 | print('%s: %s'%('INFO', msg % args))
27 |
28 | def warn(msg, *args):
29 | if MIN_LEVEL <= WARN:
30 | warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
31 |
32 | def error(msg, *args):
33 | if MIN_LEVEL <= ERROR:
34 | print(colorize('%s: %s'%('ERROR', msg % args), 'red'))
35 |
36 | # DEPRECATED:
37 | setLevel = set_level
38 |
--------------------------------------------------------------------------------
/gym/wrappers/transform_observation.py:
--------------------------------------------------------------------------------
1 | from gym import ObservationWrapper
2 |
3 |
4 | class TransformObservation(ObservationWrapper):
5 | r"""Transform the observation via an arbitrary function.
6 |
7 | Example::
8 |
9 | >>> import gym
10 | >>> env = gym.make('CartPole-v1')
11 | >>> env = TransformObservation(env, lambda obs: obs + 0.1*np.random.randn(*obs.shape))
12 | >>> env.reset()
13 | array([-0.08319338, 0.04635121, -0.07394746, 0.20877492])
14 |
15 | Args:
16 | env (Env): environment
17 | f (callable): a function that transforms the observation
18 |
19 | """
20 | def __init__(self, env, f):
21 | super(TransformObservation, self).__init__(env)
22 | assert callable(f)
23 | self.f = f
24 |
25 | def observation(self, observation):
26 | return self.f(observation)
27 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco.mujoco_env import MujocoEnv
2 | # ^^^^^ so that user gets the correct error
3 | # message if mujoco is not installed correctly
4 | from gym.envs.mujoco.ant import AntEnv
5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
6 | from gym.envs.mujoco.hopper import HopperEnv
7 | from gym.envs.mujoco.walker2d import Walker2dEnv
8 | from gym.envs.mujoco.humanoid import HumanoidEnv
9 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
10 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
11 | from gym.envs.mujoco.reacher import ReacherEnv
12 | from gym.envs.mujoco.swimmer import SwimmerEnv
13 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
14 | from gym.envs.mujoco.pusher import PusherEnv
15 | from gym.envs.mujoco.thrower import ThrowerEnv
16 | from gym.envs.mujoco.striker import StrikerEnv
17 |
--------------------------------------------------------------------------------
/gym/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from gym import error
2 | from gym.wrappers.monitor import Monitor
3 | from gym.wrappers.time_limit import TimeLimit
4 | from gym.wrappers.filter_observation import FilterObservation
5 | from gym.wrappers.atari_preprocessing import AtariPreprocessing
6 | from gym.wrappers.rescale_action import RescaleAction
7 | from gym.wrappers.flatten_observation import FlattenObservation
8 | from gym.wrappers.gray_scale_observation import GrayScaleObservation
9 | from gym.wrappers.frame_stack import LazyFrames
10 | from gym.wrappers.frame_stack import FrameStack
11 | from gym.wrappers.transform_observation import TransformObservation
12 | from gym.wrappers.transform_reward import TransformReward
13 | from gym.wrappers.resize_observation import ResizeObservation
14 | from gym.wrappers.clip_action import ClipAction
15 | from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics
16 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/reach.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'reach.xml')
8 |
9 |
10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.4049,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | }
17 | fetch_env.FetchEnv.__init__(
18 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
19 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
20 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
21 | initial_qpos=initial_qpos, reward_type=reward_type)
22 | utils.EzPickle.__init__(self)
23 |
--------------------------------------------------------------------------------
/gym/wrappers/test_rescale_action.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import RescaleAction
7 |
8 |
9 | def test_rescale_action():
10 | env = gym.make('CartPole-v1')
11 | with pytest.raises(AssertionError):
12 | env = RescaleAction(env, -1, 1)
13 | del env
14 |
15 | env = gym.make('Pendulum-v0')
16 | wrapped_env = RescaleAction(gym.make('Pendulum-v0'), -1, 1)
17 |
18 | seed = 0
19 | env.seed(seed)
20 | wrapped_env.seed(seed)
21 |
22 | obs = env.reset()
23 | wrapped_obs = wrapped_env.reset()
24 | assert np.allclose(obs, wrapped_obs)
25 |
26 | obs, reward, _, _ = env.step([1.5])
27 | with pytest.raises(AssertionError):
28 | wrapped_env.step([1.5])
29 | wrapped_obs, wrapped_reward, _, _ = wrapped_env.step([0.75])
30 |
31 | assert np.allclose(obs, wrapped_obs)
32 | assert np.allclose(reward, wrapped_reward)
33 |
--------------------------------------------------------------------------------
/py.Dockerfile:
--------------------------------------------------------------------------------
1 | # A Dockerfile that sets up a full Gym install with test dependencies
2 | ARG PYTHON_VER
3 | FROM python:$PYTHON_VER
4 | RUN apt-get -y update && apt-get install -y unzip libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg
5 | RUN \
6 | # Download mujoco
7 | mkdir /root/.mujoco && \
8 | cd /root/.mujoco && \
9 | curl -O https://www.roboti.us/download/mjpro150_linux.zip && \
10 | unzip mjpro150_linux.zip
11 |
12 | ARG MUJOCO_KEY
13 | ARG PYTHON_VER
14 | ENV MUJOCO_KEY=$MUJOCO_KEY
15 |
16 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin
17 | RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt
18 | RUN pip install pytest pytest-forked lz4
19 |
20 | COPY . /usr/local/gym/
21 | WORKDIR /usr/local/gym/
22 | RUN [ "$PYTHON_VER" != "2.7" ] && pip install .[all] || pip install .
23 |
24 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"]
25 | CMD ["pytest","--forked"]
26 |
--------------------------------------------------------------------------------
/gym/spaces/multi_binary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class MultiBinary(Space):
6 | def __init__(self, n):
7 | self.n = n
8 | super(MultiBinary, self).__init__((self.n,), np.int8)
9 |
10 | def sample(self):
11 | return self.np_random.randint(low=0, high=2, size=self.n, dtype=self.dtype)
12 |
13 | def contains(self, x):
14 | if isinstance(x, list):
15 | x = np.array(x) # Promote list to array for contains check
16 | return ((x==0) | (x==1)).all()
17 |
18 | def to_jsonable(self, sample_n):
19 | return np.array(sample_n).tolist()
20 |
21 | def from_jsonable(self, sample_n):
22 | return [np.asarray(sample) for sample in sample_n]
23 |
24 | def __repr__(self):
25 | return "MultiBinary({})".format(self.n)
26 |
27 | def __eq__(self, other):
28 | return isinstance(other, MultiBinary) and self.n == other.n
29 |
--------------------------------------------------------------------------------
/gym/wrappers/test_record_episode_statistics.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import gym
4 | from gym.wrappers import RecordEpisodeStatistics
5 |
6 |
7 | @pytest.mark.parametrize('env_id', ['CartPole-v0', 'Pendulum-v0'])
8 | @pytest.mark.parametrize('deque_size', [2, 5])
9 | def test_record_episode_statistics(env_id, deque_size):
10 | env = gym.make(env_id)
11 | env = RecordEpisodeStatistics(env, deque_size)
12 |
13 | for n in range(5):
14 | env.reset()
15 | assert env.episode_return == 0.0
16 | assert env.episode_length == 0
17 | for t in range(env.spec.max_episode_steps):
18 | _, _, done, info = env.step(env.action_space.sample())
19 | if done:
20 | assert 'episode' in info
21 | assert all([item in info['episode'] for item in ['r', 'l', 't']])
22 | break
23 | assert len(env.return_queue) == deque_size
24 | assert len(env.length_queue) == deque_size
25 |
--------------------------------------------------------------------------------
/gym/wrappers/test_resize_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import gym
4 | from gym.wrappers import ResizeObservation
5 | try:
6 | import atari_py
7 | except ImportError:
8 | atari_py = None
9 |
10 |
11 | @pytest.mark.skipif(atari_py is None, reason='Only run this test when atari_py is installed')
12 | @pytest.mark.parametrize('env_id', ['PongNoFrameskip-v0', 'SpaceInvadersNoFrameskip-v0'])
13 | @pytest.mark.parametrize('shape', [16, 32, (8, 5), [10, 7]])
14 | def test_resize_observation(env_id, shape):
15 | env = gym.make(env_id)
16 | env = ResizeObservation(env, shape)
17 |
18 |
19 | assert env.observation_space.shape[-1] == 3
20 | obs = env.reset()
21 | if isinstance(shape, int):
22 | assert env.observation_space.shape[:2] == (shape, shape)
23 | assert obs.shape == (shape, shape, 3)
24 | else:
25 | assert env.observation_space.shape[:2] == tuple(shape)
26 | assert obs.shape == tuple(shape) + (3,)
27 |
--------------------------------------------------------------------------------
/gym/wrappers/test_transform_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import TransformObservation
7 |
8 |
9 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0'])
10 | def test_transform_observation(env_id):
11 | affine_transform = lambda x: 3*x + 2
12 | env = gym.make(env_id)
13 | wrapped_env = TransformObservation(gym.make(env_id), lambda obs: affine_transform(obs))
14 |
15 | env.seed(0)
16 | wrapped_env.seed(0)
17 |
18 | obs = env.reset()
19 | wrapped_obs = wrapped_env.reset()
20 | assert np.allclose(wrapped_obs, affine_transform(obs))
21 |
22 | action = env.action_space.sample()
23 | obs, reward, done, _ = env.step(action)
24 | wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action)
25 | assert np.allclose(wrapped_obs, affine_transform(obs))
26 | assert np.allclose(wrapped_reward, reward)
27 | assert wrapped_done == done
28 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/push.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml')
8 |
9 |
10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/wrappers/resize_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym.spaces import Box
4 | from gym import ObservationWrapper
5 |
6 |
7 | class ResizeObservation(ObservationWrapper):
8 | r"""Downsample the image observation to a square image. """
9 | def __init__(self, env, shape):
10 | super(ResizeObservation, self).__init__(env)
11 | if isinstance(shape, int):
12 | shape = (shape, shape)
13 | assert all(x > 0 for x in shape), shape
14 | self.shape = tuple(shape)
15 |
16 | obs_shape = self.shape + self.observation_space.shape[2:]
17 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)
18 |
19 | def observation(self, observation):
20 | import cv2
21 | observation = cv2.resize(observation, self.shape[::-1], interpolation=cv2.INTER_AREA)
22 | if observation.ndim == 2:
23 | observation = np.expand_dims(observation, -1)
24 | return observation
25 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/duplicated_input.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to return every nth character from the input tape.
3 | http://arxiv.org/abs/1511.07275
4 | """
5 | from __future__ import division
6 | from gym.envs.algorithmic import algorithmic_env
7 |
8 |
9 | class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
10 | def __init__(self, duplication=2, base=5):
11 | self.duplication = duplication
12 | super(DuplicatedInputEnv, self).__init__(base=base, chars=True)
13 |
14 | def generate_input_data(self, size):
15 | res = []
16 | if size < self.duplication:
17 | size = self.duplication
18 | for i in range(size//self.duplication):
19 | char = self.np_random.randint(self.base)
20 | for _ in range(self.duplication):
21 | res.append(char)
22 | return res
23 |
24 | def target_from_input_data(self, input_data):
25 | return [input_data[i] for i in range(0, len(input_data), self.duplication)]
26 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/pick_and_place.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml')
8 |
9 |
10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
20 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/spaces/discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class Discrete(Space):
6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
7 |
8 | Example::
9 |
10 | >>> Discrete(2)
11 |
12 | """
13 | def __init__(self, n):
14 | assert n >= 0
15 | self.n = n
16 | super(Discrete, self).__init__((), np.int64)
17 |
18 | def sample(self):
19 | return self.np_random.randint(self.n)
20 |
21 | def contains(self, x):
22 | if isinstance(x, int):
23 | as_int = x
24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.kind in np.typecodes['AllInteger'] and x.shape == ()):
25 | as_int = int(x)
26 | else:
27 | return False
28 | return as_int >= 0 and as_int < self.n
29 |
30 | def __repr__(self):
31 | return "Discrete(%d)" % self.n
32 |
33 | def __eq__(self, other):
34 | return isinstance(other, Discrete) and self.n == other.n
35 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/slide.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | from gym import utils
5 | from gym.envs.robotics import fetch_env
6 |
7 |
8 | # Ensure we get the path separator correct on windows
9 | MODEL_XML_PATH = os.path.join('fetch', 'slide.xml')
10 |
11 |
12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle):
13 | def __init__(self, reward_type='sparse'):
14 | initial_qpos = {
15 | 'robot0:slide0': 0.05,
16 | 'robot0:slide1': 0.48,
17 | 'robot0:slide2': 0.0,
18 | 'object0:joint': [1.7, 1.1, 0.41, 1., 0., 0., 0.],
19 | }
20 | fetch_env.FetchEnv.__init__(
21 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
22 | gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]),
23 | obj_range=0.1, target_range=0.3, distance_threshold=0.05,
24 | initial_qpos=initial_qpos, reward_type=reward_type)
25 | utils.EzPickle.__init__(self)
26 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/wrappers/time_limit.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 |
4 | class TimeLimit(gym.Wrapper):
5 | def __init__(self, env, max_episode_steps=None):
6 | super(TimeLimit, self).__init__(env)
7 | if max_episode_steps is None and self.env.spec is not None:
8 | max_episode_steps = env.spec.max_episode_steps
9 | if self.env.spec is not None:
10 | self.env.spec.max_episode_steps = max_episode_steps
11 | self._max_episode_steps = max_episode_steps
12 | self._elapsed_steps = None
13 |
14 | def step(self, action):
15 | assert self._elapsed_steps is not None, "Cannot call env.step() before calling reset()"
16 | observation, reward, done, info = self.env.step(action)
17 | self._elapsed_steps += 1
18 | if self._elapsed_steps >= self._max_episode_steps:
19 | info['TimeLimit.truncated'] = not done
20 | done = True
21 | return observation, reward, done, info
22 |
23 | def reset(self, **kwargs):
24 | self._elapsed_steps = 0
25 | return self.env.reset(**kwargs)
26 |
--------------------------------------------------------------------------------
/gym/wrappers/gray_scale_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym.spaces import Box
4 | from gym import ObservationWrapper
5 |
6 |
7 | class GrayScaleObservation(ObservationWrapper):
8 | r"""Convert the image observation from RGB to gray scale. """
9 | def __init__(self, env, keep_dim=False):
10 | super(GrayScaleObservation, self).__init__(env)
11 | self.keep_dim = keep_dim
12 |
13 | assert len(env.observation_space.shape) == 3 and env.observation_space.shape[-1] == 3
14 | obs_shape = self.observation_space.shape[:2]
15 | if self.keep_dim:
16 | self.observation_space = Box(low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8)
17 | else:
18 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)
19 |
20 | def observation(self, observation):
21 | import cv2
22 | observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
23 | if self.keep_dim:
24 | observation = np.expand_dims(observation, -1)
25 | return observation
26 |
--------------------------------------------------------------------------------
/gym/utils/colorize.py:
--------------------------------------------------------------------------------
1 | """A set of common utilities used within the environments. These are
2 | not intended as API functions, and will not remain stable over time.
3 | """
4 |
5 | color2num = dict(
6 | gray=30,
7 | red=31,
8 | green=32,
9 | yellow=33,
10 | blue=34,
11 | magenta=35,
12 | cyan=36,
13 | white=37,
14 | crimson=38
15 | )
16 |
17 |
18 | def colorize(string, color, bold=False, highlight = False):
19 | """Return string surrounded by appropriate terminal color codes to
20 | print colorized text. Valid colors: gray, red, green, yellow,
21 | blue, magenta, cyan, white, crimson
22 | """
23 |
24 | # Import six here so that `utils` has no import-time dependencies.
25 | # We want this since we use `utils` during our import-time sanity checks
26 | # that verify that our dependencies (including six) are actually present.
27 | import six
28 |
29 | attr = []
30 | num = color2num[color]
31 | if highlight: num += 10
32 | attr.append(six.u(str(num)))
33 | if bold: attr.append(six.u('1'))
34 | attrs = six.u(';').join(attr)
35 | return six.u('\x1b[%sm%s\x1b[0m') % (attrs, string)
36 |
--------------------------------------------------------------------------------
/gym/utils/ezpickle.py:
--------------------------------------------------------------------------------
1 | class EzPickle(object):
2 | """Objects that are pickled and unpickled via their constructor
3 | arguments.
4 |
5 | Example usage:
6 |
7 | class Dog(Animal, EzPickle):
8 | def __init__(self, furcolor, tailkind="bushy"):
9 | Animal.__init__()
10 | EzPickle.__init__(furcolor, tailkind)
11 | ...
12 |
13 | When this object is unpickled, a new Dog will be constructed by passing the provided
14 | furcolor and tailkind into the constructor. However, philosophers are still not sure
15 | whether it is still the same dog.
16 |
17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo
18 | and Atari.
19 | """
20 | def __init__(self, *args, **kwargs):
21 | self._ezpickle_args = args
22 | self._ezpickle_kwargs = kwargs
23 | def __getstate__(self):
24 | return {"_ezpickle_args" : self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs}
25 | def __setstate__(self, d):
26 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"])
27 | self.__dict__.update(out.__dict__)
28 |
--------------------------------------------------------------------------------
/gym/wrappers/README.md:
--------------------------------------------------------------------------------
1 | # Wrappers
2 |
3 | Wrappers are used to transform an environment in a modular way:
4 |
5 | ```python
6 | env = gym.make('Pong-v0')
7 | env = MyWrapper(env)
8 | ```
9 |
10 | Note that we may later restructure any of the files in this directory,
11 | but will keep the wrappers available at the wrappers' top-level
12 | folder. So for example, you should access `MyWrapper` as follows:
13 |
14 | ```python
15 | from gym.wrappers import MyWrapper
16 | ```
17 |
18 | ## Quick tips for writing your own wrapper
19 |
20 | - Don't forget to call `super(class_name, self).__init__(env)` if you override the wrapper's `__init__` function
21 | - You can access the inner environment with `self.unwrapped`
22 | - You can access the previous layer using `self.env`
23 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer
24 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `step`, `reset`, `render`, `close`, or `seed`
25 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`)
26 |
--------------------------------------------------------------------------------
/gym/wrappers/test_flatten_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import FlattenObservation
7 | from gym import spaces
8 |
9 |
10 | @pytest.mark.parametrize('env_id', ['Blackjack-v0', 'KellyCoinflip-v0'])
11 | def test_flatten_observation(env_id):
12 | env = gym.make(env_id)
13 | wrapped_env = FlattenObservation(env)
14 |
15 | obs = env.reset()
16 | wrapped_obs = wrapped_env.reset()
17 |
18 | if env_id == 'Blackjack-v0':
19 | space = spaces.Tuple((
20 | spaces.Discrete(32),
21 | spaces.Discrete(11),
22 | spaces.Discrete(2)))
23 | wrapped_space = spaces.Box(-np.inf, np.inf,
24 | [32 + 11 + 2], dtype=np.float32)
25 | elif env_id == 'KellyCoinflip-v0':
26 | space = spaces.Tuple((
27 | spaces.Box(0, 250.0, [1], dtype=np.float32),
28 | spaces.Discrete(300 + 1)))
29 | wrapped_space = spaces.Box(-np.inf, np.inf,
30 | [1 + (300 + 1)], dtype=np.float32)
31 |
32 | assert space.contains(obs)
33 | assert wrapped_space.contains(wrapped_obs)
34 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/inverted_pendulum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
9 |
10 | def step(self, a):
11 | reward = 1.0
12 | self.do_simulation(a, self.frame_skip)
13 | ob = self._get_obs()
14 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
15 | done = not notdone
16 | return ob, reward, done, {}
17 |
18 | def reset_model(self):
19 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
20 | qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
21 | self.set_state(qpos, qvel)
22 | return self._get_obs()
23 |
24 | def _get_obs(self):
25 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
26 |
27 | def viewer_setup(self):
28 | v = self.viewer
29 | v.cam.trackbodyid = 0
30 | v.cam.distance = self.model.stat.extent
31 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/swimmer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | ctrl_cost_coeff = 0.0001
12 | xposbefore = self.sim.data.qpos[0]
13 | self.do_simulation(a, self.frame_skip)
14 | xposafter = self.sim.data.qpos[0]
15 | reward_fwd = (xposafter - xposbefore) / self.dt
16 | reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
17 | reward = reward_fwd + reward_ctrl
18 | ob = self._get_obs()
19 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
20 |
21 | def _get_obs(self):
22 | qpos = self.sim.data.qpos
23 | qvel = self.sim.data.qvel
24 | return np.concatenate([qpos.flat[2:], qvel.flat])
25 |
26 | def reset_model(self):
27 | self.set_state(
28 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
29 | self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
30 | )
31 | return self._get_obs()
32 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/reversed_addition.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from gym.envs.algorithmic import algorithmic_env
3 |
4 |
5 | class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
6 | def __init__(self, rows=2, base=3):
7 | super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
8 |
9 | def target_from_input_data(self, input_strings):
10 | curry = 0
11 | target = []
12 | for digits in input_strings:
13 | total = sum(digits) + curry
14 | target.append(total % self.base)
15 | curry = total // self.base
16 |
17 | if curry > 0:
18 | target.append(curry)
19 | return target
20 |
21 | @property
22 | def time_limit(self):
23 | # Quirk preserved for the sake of consistency: add the length of the input
24 | # rather than the length of the desired output (which may differ if there's
25 | # an extra carried digit).
26 | # TODO: It seems like this time limit is so strict as to make Addition3-v0
27 | # unsolvable, since agents aren't even given enough time steps to look at
28 | # all the digits. (The solutions on the scoreboard seem to only work by
29 | # save-scumming.)
30 | return self.input_width*2 + 4
31 |
--------------------------------------------------------------------------------
/gym/wrappers/test_frame_stack.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | pytest.importorskip("atari_py")
3 |
4 | import numpy as np
5 | import gym
6 | from gym.wrappers import FrameStack
7 | try:
8 | import lz4
9 | except ImportError:
10 | lz4 = None
11 |
12 |
13 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0', 'Pong-v0'])
14 | @pytest.mark.parametrize('num_stack', [2, 3, 4])
15 | @pytest.mark.parametrize('lz4_compress', [
16 | pytest.param(True, marks=pytest.mark.skipif(lz4 is None, reason="Need lz4 to run tests with compression")),
17 | False
18 | ])
19 | def test_frame_stack(env_id, num_stack, lz4_compress):
20 | env = gym.make(env_id)
21 | shape = env.observation_space.shape
22 | env = FrameStack(env, num_stack, lz4_compress)
23 | assert env.observation_space.shape == (num_stack,) + shape
24 |
25 | obs = env.reset()
26 | obs = np.asarray(obs)
27 | assert obs.shape == (num_stack,) + shape
28 | for i in range(1, num_stack):
29 | assert np.allclose(obs[i - 1], obs[i])
30 |
31 | obs, _, _, _ = env.step(env.action_space.sample())
32 | obs = np.asarray(obs)
33 | assert obs.shape == (num_stack,) + shape
34 | for i in range(1, num_stack - 1):
35 | assert np.allclose(obs[i - 1], obs[i])
36 | assert not np.allclose(obs[-1], obs[-2])
37 |
--------------------------------------------------------------------------------
/gym/vector/utils/misc.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import os
3 |
4 | __all__ = ['CloudpickleWrapper', 'clear_mpi_env_vars']
5 |
6 | class CloudpickleWrapper(object):
7 | def __init__(self, fn):
8 | self.fn = fn
9 |
10 | def __getstate__(self):
11 | import cloudpickle
12 | return cloudpickle.dumps(self.fn)
13 |
14 | def __setstate__(self, ob):
15 | import pickle
16 | self.fn = pickle.loads(ob)
17 |
18 | def __call__(self):
19 | return self.fn()
20 |
21 | @contextlib.contextmanager
22 | def clear_mpi_env_vars():
23 | """
24 | `from mpi4py import MPI` will call `MPI_Init` by default. If the child
25 | process has MPI environment variables, MPI will think that the child process
26 | is an MPI process just like the parent and do bad things such as hang.
27 |
28 | This context manager is a hacky way to clear those environment variables
29 | temporarily such as when we are starting multiprocessing Processes.
30 | """
31 | removed_environment = {}
32 | for k, v in list(os.environ.items()):
33 | for prefix in ['OMPI_', 'PMI_']:
34 | if k.startswith(prefix):
35 | removed_environment[k] = v
36 | del os.environ[k]
37 | try:
38 | yield
39 | finally:
40 | os.environ.update(removed_environment)
41 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/half_cheetah.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, action):
11 | xposbefore = self.sim.data.qpos[0]
12 | self.do_simulation(action, self.frame_skip)
13 | xposafter = self.sim.data.qpos[0]
14 | ob = self._get_obs()
15 | reward_ctrl = - 0.1 * np.square(action).sum()
16 | reward_run = (xposafter - xposbefore)/self.dt
17 | reward = reward_ctrl + reward_run
18 | done = False
19 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
20 |
21 | def _get_obs(self):
22 | return np.concatenate([
23 | self.sim.data.qpos.flat[1:],
24 | self.sim.data.qvel.flat,
25 | ])
26 |
27 | def reset_model(self):
28 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
29 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
30 | self.set_state(qpos, qvel)
31 | return self._get_obs()
32 |
33 | def viewer_setup(self):
34 | self.viewer.cam.distance = self.model.stat.extent * 0.5
35 |
--------------------------------------------------------------------------------
/gym/wrappers/rescale_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym import spaces
5 |
6 |
7 | class RescaleAction(gym.ActionWrapper):
8 | r"""Rescales the continuous action space of the environment to a range [a,b].
9 |
10 | Example::
11 |
12 | >>> RescaleAction(env, a, b).action_space == Box(a,b)
13 | True
14 |
15 | """
16 | def __init__(self, env, a, b):
17 | assert isinstance(env.action_space, spaces.Box), (
18 | "expected Box action space, got {}".format(type(env.action_space)))
19 | assert np.less_equal(a, b).all(), (a, b)
20 | super(RescaleAction, self).__init__(env)
21 | self.a = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + a
22 | self.b = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + b
23 | self.action_space = spaces.Box(low=a, high=b, shape=env.action_space.shape, dtype=env.action_space.dtype)
24 |
25 | def action(self, action):
26 | assert np.all(np.greater_equal(action, self.a)), (action, self.a)
27 | assert np.all(np.less_equal(action, self.b)), (action, self.b)
28 | low = self.env.action_space.low
29 | high = self.env.action_space.high
30 | action = low + (high - low)*((action - self.a)/(self.b - self.a))
31 | action = np.clip(action, low, high)
32 | return action
33 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_frozenlake_dfs.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym.envs.toy_text.frozen_lake import generate_random_map
5 |
6 | # Test that FrozenLake map generation creates valid maps of various sizes.
7 | def test_frozenlake_dfs_map_generation():
8 |
9 | def frozenlake_dfs_path_exists(res):
10 | frontier, discovered = [], set()
11 | frontier.append((0,0))
12 | while frontier:
13 | r, c = frontier.pop()
14 | if not (r,c) in discovered:
15 | discovered.add((r,c))
16 | directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
17 | for x, y in directions:
18 | r_new = r + x
19 | c_new = c + y
20 | if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
21 | continue
22 | if res[r_new][c_new] == 'G':
23 | return True
24 | if (res[r_new][c_new] not in '#H'):
25 | frontier.append((r_new, c_new))
26 | return False
27 |
28 | map_sizes = [5, 10, 200]
29 | for size in map_sizes:
30 | new_frozenlake = generate_random_map(size)
31 | assert len(new_frozenlake) == size
32 | assert len(new_frozenlake[0]) == size
33 | assert frozenlake_dfs_path_exists(new_frozenlake)
34 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/push.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/gym/wrappers/test_gray_scale_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import GrayScaleObservation
7 | from gym.wrappers import AtariPreprocessing
8 | pytest.importorskip('atari_py')
9 | pytest.importorskip('cv2')
10 |
11 | @pytest.mark.parametrize('env_id', ['PongNoFrameskip-v0', 'SpaceInvadersNoFrameskip-v0'])
12 | @pytest.mark.parametrize('keep_dim', [True, False])
13 | def test_gray_scale_observation(env_id, keep_dim):
14 | gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True)
15 | rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False)
16 | wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
17 | assert rgb_env.observation_space.shape[-1] == 3
18 |
19 | seed = 0
20 | gray_env.seed(seed)
21 | wrapped_env.seed(seed)
22 |
23 | gray_obs = gray_env.reset()
24 | wrapped_obs = wrapped_env.reset()
25 |
26 | if keep_dim:
27 | assert wrapped_env.observation_space.shape[-1] == 1
28 | assert len(wrapped_obs.shape) == 3
29 | wrapped_obs = wrapped_obs.squeeze(-1)
30 | else:
31 | assert len(wrapped_env.observation_space.shape) == 2
32 | assert len(wrapped_obs.shape) == 2
33 |
34 | # ALE gray scale is slightly different, but no more than by one shade
35 | assert np.allclose(gray_obs.astype('int32'), wrapped_obs.astype('int32'), atol=1)
36 |
--------------------------------------------------------------------------------
/gym/wrappers/record_episode_statistics.py:
--------------------------------------------------------------------------------
1 | import time
2 | from collections import deque
3 |
4 | import gym
5 |
6 |
7 | class RecordEpisodeStatistics(gym.Wrapper):
8 | def __init__(self, env, deque_size=100):
9 | super(RecordEpisodeStatistics, self).__init__(env)
10 | self.t0 = time.time() # TODO: use perf_counter when gym removes Python 2 support
11 | self.episode_return = 0.0
12 | self.episode_length = 0
13 | self.return_queue = deque(maxlen=deque_size)
14 | self.length_queue = deque(maxlen=deque_size)
15 |
16 | def reset(self, **kwargs):
17 | observation = super(RecordEpisodeStatistics, self).reset(**kwargs)
18 | self.episode_return = 0.0
19 | self.episode_length = 0
20 | return observation
21 |
22 | def step(self, action):
23 | observation, reward, done, info = super(RecordEpisodeStatistics, self).step(action)
24 | self.episode_return += reward
25 | self.episode_length += 1
26 | if done:
27 | info['episode'] = {'r': self.episode_return,
28 | 'l': self.episode_length,
29 | 't': round(time.time() - self.t0, 6)}
30 | self.return_queue.append(self.episode_return)
31 | self.length_queue.append(self.episode_length)
32 | self.episode_return = 0.0
33 | self.episode_length = 0
34 | return observation, reward, done, info
35 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/inverted_pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/slide.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/gym/envs/tests/spec_list.py:
--------------------------------------------------------------------------------
1 | from gym import envs, logger
2 | import os
3 |
4 |
5 | SKIP_MUJOCO_WARNING_MESSAGE = (
6 | "Cannot run mujoco test (either license key not found or mujoco not"
7 | "installed properly).")
8 |
9 |
10 | skip_mujoco = not (os.environ.get('MUJOCO_KEY'))
11 | if not skip_mujoco:
12 | try:
13 | import mujoco_py
14 | except ImportError:
15 | skip_mujoco = True
16 |
17 | def should_skip_env_spec_for_tests(spec):
18 | # We skip tests for envs that require dependencies or are otherwise
19 | # troublesome to run frequently
20 | ep = spec.entry_point
21 | # Skip mujoco tests for pull request CI
22 | if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')):
23 | return True
24 | try:
25 | import atari_py
26 | except ImportError:
27 | if ep.startswith('gym.envs.atari'):
28 | return True
29 | try:
30 | import Box2D
31 | except ImportError:
32 | if ep.startswith('gym.envs.box2d'):
33 | return True
34 |
35 | if ( 'GoEnv' in ep or
36 | 'HexEnv' in ep or
37 | (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
38 | ):
39 | logger.warn("Skipping tests for env {}".format(ep))
40 | return True
41 | return False
42 |
43 | spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)]
44 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/walker2d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 |
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, a):
12 | posbefore = self.sim.data.qpos[0]
13 | self.do_simulation(a, self.frame_skip)
14 | posafter, height, ang = self.sim.data.qpos[0:3]
15 | alive_bonus = 1.0
16 | reward = ((posafter - posbefore) / self.dt)
17 | reward += alive_bonus
18 | reward -= 1e-3 * np.square(a).sum()
19 | done = not (height > 0.8 and height < 2.0 and
20 | ang > -1.0 and ang < 1.0)
21 | ob = self._get_obs()
22 | return ob, reward, done, {}
23 |
24 | def _get_obs(self):
25 | qpos = self.sim.data.qpos
26 | qvel = self.sim.data.qvel
27 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
28 |
29 | def reset_model(self):
30 | self.set_state(
31 | self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
32 | self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
33 | )
34 | return self._get_obs()
35 |
36 | def viewer_setup(self):
37 | self.viewer.cam.trackbodyid = 2
38 | self.viewer.cam.distance = self.model.stat.extent * 0.5
39 | self.viewer.cam.lookat[2] = 1.15
40 | self.viewer.cam.elevation = -20
41 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/roulette.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.utils import seeding
4 |
5 |
6 | class RouletteEnv(gym.Env):
7 | """Simple roulette environment
8 |
9 | The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up,
10 | you win a reward of 35. If the parity of your bet matches the parity
11 | of the spin, you win 1. Otherwise you receive a reward of -1.
12 |
13 | The long run reward for playing 0 should be -1/37 for any state
14 |
15 | The last action (38) stops the rollout for a return of 0 (walking away)
16 | """
17 | def __init__(self, spots=37):
18 | self.n = spots + 1
19 | self.action_space = spaces.Discrete(self.n)
20 | self.observation_space = spaces.Discrete(1)
21 | self.seed()
22 |
23 | def seed(self, seed=None):
24 | self.np_random, seed = seeding.np_random(seed)
25 | return [seed]
26 |
27 | def step(self, action):
28 | assert self.action_space.contains(action)
29 | if action == self.n - 1:
30 | # observation, reward, done, info
31 | return 0, 0, True, {}
32 |
33 | # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B]
34 | val = self.np_random.randint(0, self.n - 1)
35 | if val == action == 0:
36 | reward = self.n - 2.0
37 | elif val != 0 and action != 0 and val % 2 == action % 2:
38 | reward = 1.0
39 | else:
40 | reward = -1.0
41 | return 0, reward, False, {}
42 |
43 | def reset(self):
44 | return 0
45 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # gym
2 |
3 | The MIT License
4 |
5 | Copyright (c) 2016 OpenAI (https://openai.com)
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 |
25 | # Mujoco models
26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
27 | ```
28 | This file is part of MuJoCo.
29 | Copyright 2009-2015 Roboti LLC.
30 | Mujoco :: Advanced physics simulation engine
31 | Source : www.roboti.us
32 | Version : 1.31
33 | Released : 23Apr16
34 | Author :: Vikash Kumar
35 | Contacts : kumar@roboti.us
36 | ```
37 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/hopper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | posbefore = self.sim.data.qpos[0]
12 | self.do_simulation(a, self.frame_skip)
13 | posafter, height, ang = self.sim.data.qpos[0:3]
14 | alive_bonus = 1.0
15 | reward = (posafter - posbefore) / self.dt
16 | reward += alive_bonus
17 | reward -= 1e-3 * np.square(a).sum()
18 | s = self.state_vector()
19 | done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
20 | (height > .7) and (abs(ang) < .2))
21 | ob = self._get_obs()
22 | return ob, reward, done, {}
23 |
24 | def _get_obs(self):
25 | return np.concatenate([
26 | self.sim.data.qpos.flat[1:],
27 | np.clip(self.sim.data.qvel.flat, -10, 10)
28 | ])
29 |
30 | def reset_model(self):
31 | qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
32 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
33 | self.set_state(qpos, qvel)
34 | return self._get_obs()
35 |
36 | def viewer_setup(self):
37 | self.viewer.cam.trackbodyid = 2
38 | self.viewer.cam.distance = self.model.stat.extent * 0.75
39 | self.viewer.cam.lookat[2] = 1.15
40 | self.viewer.cam.elevation = -20
41 |
--------------------------------------------------------------------------------
/gym/spaces/space.py:
--------------------------------------------------------------------------------
1 | from gym.utils import seeding
2 |
3 |
4 | class Space(object):
5 | """Defines the observation and action spaces, so you can write generic
6 | code that applies to any Env. For example, you can choose a random
7 | action.
8 | """
9 | def __init__(self, shape=None, dtype=None):
10 | import numpy as np # takes about 300-400ms to import, so we load lazily
11 | self.shape = None if shape is None else tuple(shape)
12 | self.dtype = None if dtype is None else np.dtype(dtype)
13 | self.np_random = None
14 | self.seed()
15 |
16 | def sample(self):
17 | """Randomly sample an element of this space. Can be
18 | uniform or non-uniform sampling based on boundedness of space."""
19 | raise NotImplementedError
20 |
21 | def seed(self, seed=None):
22 | """Seed the PRNG of this space. """
23 | self.np_random, seed = seeding.np_random(seed)
24 | return [seed]
25 |
26 | def contains(self, x):
27 | """
28 | Return boolean specifying if x is a valid
29 | member of this space
30 | """
31 | raise NotImplementedError
32 |
33 | def __contains__(self, x):
34 | return self.contains(x)
35 |
36 | def to_jsonable(self, sample_n):
37 | """Convert a batch of samples from this space to a JSONable data type."""
38 | # By default, assume identity is JSONable
39 | return sample_n
40 |
41 | def from_jsonable(self, sample_n):
42 | """Convert a JSONable data type to a batch of samples from this space."""
43 | # By default, assume identity is JSONable
44 | return sample_n
45 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/inverted_double_pendulum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 |
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, action):
12 | self.do_simulation(action, self.frame_skip)
13 | ob = self._get_obs()
14 | x, _, y = self.sim.data.site_xpos[0]
15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
16 | v1, v2 = self.sim.data.qvel[1:3]
17 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
18 | alive_bonus = 10
19 | r = alive_bonus - dist_penalty - vel_penalty
20 | done = bool(y <= 1)
21 | return ob, r, done, {}
22 |
23 | def _get_obs(self):
24 | return np.concatenate([
25 | self.sim.data.qpos[:1], # cart x pos
26 | np.sin(self.sim.data.qpos[1:]), # link angles
27 | np.cos(self.sim.data.qpos[1:]),
28 | np.clip(self.sim.data.qvel, -10, 10),
29 | np.clip(self.sim.data.qfrc_constraint, -10, 10)
30 | ]).ravel()
31 |
32 | def reset_model(self):
33 | self.set_state(
34 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
35 | self.init_qvel + self.np_random.randn(self.model.nv) * .1
36 | )
37 | return self._get_obs()
38 |
39 | def viewer_setup(self):
40 | v = self.viewer
41 | v.cam.trackbodyid = 0
42 | v.cam.distance = self.model.stat.extent * 0.5
43 | v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]
44 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/pick_and_place.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/reacher.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
9 |
10 | def step(self, a):
11 | vec = self.get_body_com("fingertip")-self.get_body_com("target")
12 | reward_dist = - np.linalg.norm(vec)
13 | reward_ctrl = - np.square(a).sum()
14 | reward = reward_dist + reward_ctrl
15 | self.do_simulation(a, self.frame_skip)
16 | ob = self._get_obs()
17 | done = False
18 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
19 |
20 | def viewer_setup(self):
21 | self.viewer.cam.trackbodyid = 0
22 |
23 | def reset_model(self):
24 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
25 | while True:
26 | self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
27 | if np.linalg.norm(self.goal) < 0.2:
28 | break
29 | qpos[-2:] = self.goal
30 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
31 | qvel[-2:] = 0
32 | self.set_state(qpos, qvel)
33 | return self._get_obs()
34 |
35 | def _get_obs(self):
36 | theta = self.sim.data.qpos.flat[:2]
37 | return np.concatenate([
38 | np.cos(theta),
39 | np.sin(theta),
40 | self.sim.data.qpos.flat[2:],
41 | self.sim.data.qvel.flat[:2],
42 | self.get_body_com("fingertip") - self.get_body_com("target")
43 | ])
44 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/test_video_recorder.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import shutil
4 | import tempfile
5 | import numpy as np
6 |
7 | import gym
8 | from gym.wrappers.monitoring.video_recorder import VideoRecorder
9 |
10 | class BrokenRecordableEnv(object):
11 | metadata = {'render.modes': [None, 'rgb_array']}
12 |
13 | def render(self, mode=None):
14 | pass
15 |
16 | class UnrecordableEnv(object):
17 | metadata = {'render.modes': [None]}
18 |
19 | def render(self, mode=None):
20 | pass
21 |
22 | def test_record_simple():
23 | env = gym.make("CartPole-v1")
24 | rec = VideoRecorder(env)
25 | env.reset()
26 | rec.capture_frame()
27 | rec.close()
28 | assert not rec.empty
29 | assert not rec.broken
30 | assert os.path.exists(rec.path)
31 | f = open(rec.path)
32 | assert os.fstat(f.fileno()).st_size > 100
33 |
34 | def test_no_frames():
35 | env = BrokenRecordableEnv()
36 | rec = VideoRecorder(env)
37 | rec.close()
38 | assert rec.empty
39 | assert rec.functional
40 | assert not os.path.exists(rec.path)
41 |
42 | def test_record_unrecordable_method():
43 | env = UnrecordableEnv()
44 | rec = VideoRecorder(env)
45 | assert not rec.enabled
46 | rec.close()
47 |
48 | def test_record_breaking_render_method():
49 | env = BrokenRecordableEnv()
50 | rec = VideoRecorder(env)
51 | rec.capture_frame()
52 | rec.close()
53 | assert rec.empty
54 | assert rec.broken
55 | assert not os.path.exists(rec.path)
56 |
57 | def test_text_envs():
58 | env = gym.make('FrozenLake-v0')
59 | video = VideoRecorder(env)
60 | try:
61 | env.reset()
62 | video.capture_frame()
63 | video.close()
64 | finally:
65 | os.remove(video.path)
66 |
--------------------------------------------------------------------------------
/gym/vector/tests/test_vector_env.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym.vector.tests.utils import make_env
5 |
6 | from gym.vector.async_vector_env import AsyncVectorEnv
7 | from gym.vector.sync_vector_env import SyncVectorEnv
8 |
9 | @pytest.mark.parametrize('shared_memory', [True, False])
10 | def test_vector_env_equal(shared_memory):
11 | env_fns = [make_env('CubeCrash-v0', i) for i in range(4)]
12 | num_steps = 100
13 | try:
14 | async_env = AsyncVectorEnv(env_fns, shared_memory=shared_memory)
15 | sync_env = SyncVectorEnv(env_fns)
16 |
17 | async_env.seed(0)
18 | sync_env.seed(0)
19 |
20 | assert async_env.num_envs == sync_env.num_envs
21 | assert async_env.observation_space == sync_env.observation_space
22 | assert async_env.single_observation_space == sync_env.single_observation_space
23 | assert async_env.action_space == sync_env.action_space
24 | assert async_env.single_action_space == sync_env.single_action_space
25 |
26 | async_observations = async_env.reset()
27 | sync_observations = sync_env.reset()
28 | assert np.all(async_observations == sync_observations)
29 |
30 | for _ in range(num_steps):
31 | actions = async_env.action_space.sample()
32 | assert actions in sync_env.action_space
33 |
34 | async_observations, async_rewards, async_dones, _ = async_env.step(actions)
35 | sync_observations, sync_rewards, sync_dones, _ = sync_env.step(actions)
36 |
37 | assert np.all(async_observations == sync_observations)
38 | assert np.all(async_rewards == sync_rewards)
39 | assert np.all(async_dones == sync_dones)
40 |
41 | finally:
42 | async_env.close()
43 | sync_env.close()
44 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym import Env, spaces
4 | from gym.utils import seeding
5 |
6 | def categorical_sample(prob_n, np_random):
7 | """
8 | Sample from categorical distribution
9 | Each row specifies class probabilities
10 | """
11 | prob_n = np.asarray(prob_n)
12 | csprob_n = np.cumsum(prob_n)
13 | return (csprob_n > np_random.rand()).argmax()
14 |
15 |
16 | class DiscreteEnv(Env):
17 |
18 | """
19 | Has the following members
20 | - nS: number of states
21 | - nA: number of actions
22 | - P: transitions (*)
23 | - isd: initial state distribution (**)
24 |
25 | (*) dictionary dict of dicts of lists, where
26 | P[s][a] == [(probability, nextstate, reward, done), ...]
27 | (**) list or array of length nS
28 |
29 |
30 | """
31 | def __init__(self, nS, nA, P, isd):
32 | self.P = P
33 | self.isd = isd
34 | self.lastaction = None # for rendering
35 | self.nS = nS
36 | self.nA = nA
37 |
38 | self.action_space = spaces.Discrete(self.nA)
39 | self.observation_space = spaces.Discrete(self.nS)
40 |
41 | self.seed()
42 | self.s = categorical_sample(self.isd, self.np_random)
43 |
44 | def seed(self, seed=None):
45 | self.np_random, seed = seeding.np_random(seed)
46 | return [seed]
47 |
48 | def reset(self):
49 | self.s = categorical_sample(self.isd, self.np_random)
50 | self.lastaction = None
51 | return self.s
52 |
53 | def step(self, a):
54 | transitions = self.P[self.s][a]
55 | i = categorical_sample([t[0] for t in transitions], self.np_random)
56 | p, s, r, d= transitions[i]
57 | self.s = s
58 | self.lastaction = a
59 | return (s, r, d, {"prob" : p})
60 |
--------------------------------------------------------------------------------
/gym/spaces/tuple.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class Tuple(Space):
6 | """
7 | A tuple (i.e., product) of simpler spaces
8 |
9 | Example usage:
10 | self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
11 | """
12 | def __init__(self, spaces):
13 | self.spaces = spaces
14 | for space in spaces:
15 | assert isinstance(space, Space), "Elements of the tuple must be instances of gym.Space"
16 | super(Tuple, self).__init__(None, None)
17 |
18 | def seed(self, seed=None):
19 | [space.seed(seed) for space in self.spaces]
20 |
21 | def sample(self):
22 | return tuple([space.sample() for space in self.spaces])
23 |
24 | def contains(self, x):
25 | if isinstance(x, list):
26 | x = tuple(x) # Promote list to tuple for contains check
27 | return isinstance(x, tuple) and len(x) == len(self.spaces) and all(
28 | space.contains(part) for (space,part) in zip(self.spaces,x))
29 |
30 | def __repr__(self):
31 | return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")"
32 |
33 | def to_jsonable(self, sample_n):
34 | # serialize as list-repr of tuple of vectors
35 | return [space.to_jsonable([sample[i] for sample in sample_n]) \
36 | for i, space in enumerate(self.spaces)]
37 |
38 | def from_jsonable(self, sample_n):
39 | return [sample for sample in zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)])]
40 |
41 | def __getitem__(self, index):
42 | return self.spaces[index]
43 |
44 | def __len__(self):
45 | return len(self.spaces)
46 |
47 | def __eq__(self, other):
48 | return isinstance(other, Tuple) and self.spaces == other.spaces
49 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/ant.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | xposbefore = self.get_body_com("torso")[0]
12 | self.do_simulation(a, self.frame_skip)
13 | xposafter = self.get_body_com("torso")[0]
14 | forward_reward = (xposafter - xposbefore)/self.dt
15 | ctrl_cost = .5 * np.square(a).sum()
16 | contact_cost = 0.5 * 1e-3 * np.sum(
17 | np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
18 | survive_reward = 1.0
19 | reward = forward_reward - ctrl_cost - contact_cost + survive_reward
20 | state = self.state_vector()
21 | notdone = np.isfinite(state).all() \
22 | and state[2] >= 0.2 and state[2] <= 1.0
23 | done = not notdone
24 | ob = self._get_obs()
25 | return ob, reward, done, dict(
26 | reward_forward=forward_reward,
27 | reward_ctrl=-ctrl_cost,
28 | reward_contact=-contact_cost,
29 | reward_survive=survive_reward)
30 |
31 | def _get_obs(self):
32 | return np.concatenate([
33 | self.sim.data.qpos.flat[2:],
34 | self.sim.data.qvel.flat,
35 | np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
36 | ])
37 |
38 | def reset_model(self):
39 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
40 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
41 | self.set_state(qpos, qvel)
42 | return self._get_obs()
43 |
44 | def viewer_setup(self):
45 | self.viewer.cam.distance = self.model.stat.extent * 0.5
46 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/point.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/docs/creating-environments.md:
--------------------------------------------------------------------------------
1 | # How to create new environments for Gym
2 |
3 | * Create a new repo called gym-foo, which should also be a PIP package.
4 |
5 | * A good example is https://github.com/openai/gym-soccer.
6 |
7 | * It should have at least the following files:
8 | ```sh
9 | gym-foo/
10 | README.md
11 | setup.py
12 | gym_foo/
13 | __init__.py
14 | envs/
15 | __init__.py
16 | foo_env.py
17 | foo_extrahard_env.py
18 | ```
19 |
20 | * `gym-foo/setup.py` should have:
21 |
22 | ```python
23 | from setuptools import setup
24 |
25 | setup(name='gym_foo',
26 | version='0.0.1',
27 | install_requires=['gym'] # And any other dependencies foo needs
28 | )
29 | ```
30 |
31 | * `gym-foo/gym_foo/__init__.py` should have:
32 | ```python
33 | from gym.envs.registration import register
34 |
35 | register(
36 | id='foo-v0',
37 | entry_point='gym_foo.envs:FooEnv',
38 | )
39 | register(
40 | id='foo-extrahard-v0',
41 | entry_point='gym_foo.envs:FooExtraHardEnv',
42 | )
43 | ```
44 |
45 | * `gym-foo/gym_foo/envs/__init__.py` should have:
46 | ```python
47 | from gym_foo.envs.foo_env import FooEnv
48 | from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv
49 | ```
50 |
51 | * `gym-foo/gym_foo/envs/foo_env.py` should look something like:
52 | ```python
53 | import gym
54 | from gym import error, spaces, utils
55 | from gym.utils import seeding
56 |
57 | class FooEnv(gym.Env):
58 | metadata = {'render.modes': ['human']}
59 |
60 | def __init__(self):
61 | ...
62 | def step(self, action):
63 | ...
64 | def reset(self):
65 | ...
66 | def render(self, mode='human'):
67 | ...
68 | def close(self):
69 | ...
70 | ```
71 |
72 | * After you have installed your package with `pip install -e gym-foo`, you can create an instance of the environment with `gym.make('gym_foo:foo-v0')`
73 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/humanoidstandup.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco import mujoco_env
2 | from gym import utils
3 | import numpy as np
4 |
5 | class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def _get_obs(self):
11 | data = self.sim.data
12 | return np.concatenate([data.qpos.flat[2:],
13 | data.qvel.flat,
14 | data.cinert.flat,
15 | data.cvel.flat,
16 | data.qfrc_actuator.flat,
17 | data.cfrc_ext.flat])
18 |
19 | def step(self, a):
20 | self.do_simulation(a, self.frame_skip)
21 | pos_after = self.sim.data.qpos[2]
22 | data = self.sim.data
23 | uph_cost = (pos_after - 0) / self.model.opt.timestep
24 |
25 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
26 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
27 | quad_impact_cost = min(quad_impact_cost, 10)
28 | reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
29 |
30 | done = bool(False)
31 | return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
32 |
33 | def reset_model(self):
34 | c = 0.01
35 | self.set_state(
36 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
37 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
38 | )
39 | return self._get_obs()
40 |
41 | def viewer_setup(self):
42 | self.viewer.cam.trackbodyid = 1
43 | self.viewer.cam.distance = self.model.stat.extent * 1.0
44 | self.viewer.cam.lookat[2] = 0.8925
45 | self.viewer.cam.elevation = -20
46 |
--------------------------------------------------------------------------------
/gym/wrappers/test_transform_reward.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import TransformReward
7 |
8 |
9 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0'])
10 | def test_transform_reward(env_id):
11 | # use case #1: scale
12 | scales = [0.1, 200]
13 | for scale in scales:
14 | env = gym.make(env_id)
15 | wrapped_env = TransformReward(gym.make(env_id), lambda r: scale*r)
16 | action = env.action_space.sample()
17 |
18 | env.seed(0)
19 | env.reset()
20 | wrapped_env.seed(0)
21 | wrapped_env.reset()
22 |
23 | _, reward, _, _ = env.step(action)
24 | _, wrapped_reward, _, _ = wrapped_env.step(action)
25 |
26 | assert wrapped_reward == scale*reward
27 | del env, wrapped_env
28 |
29 | # use case #2: clip
30 | min_r = -0.0005
31 | max_r = 0.0002
32 | env = gym.make(env_id)
33 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r))
34 | action = env.action_space.sample()
35 |
36 | env.seed(0)
37 | env.reset()
38 | wrapped_env.seed(0)
39 | wrapped_env.reset()
40 |
41 | _, reward, _, _ = env.step(action)
42 | _, wrapped_reward, _, _ = wrapped_env.step(action)
43 |
44 | assert abs(wrapped_reward) < abs(reward)
45 | assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002
46 | del env, wrapped_env
47 |
48 | # use case #3: sign
49 | env = gym.make(env_id)
50 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r))
51 |
52 | env.seed(0)
53 | env.reset()
54 | wrapped_env.seed(0)
55 | wrapped_env.reset()
56 |
57 | for _ in range(1000):
58 | action = env.action_space.sample()
59 | _, wrapped_reward, done, _ = wrapped_env.step(action)
60 | assert wrapped_reward in [-1.0, 0.0, 1.0]
61 | if done:
62 | break
63 | del env, wrapped_env
64 |
--------------------------------------------------------------------------------
/examples/agents/random_agent.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 |
4 | import gym
5 | from gym import wrappers, logger
6 |
7 | class RandomAgent(object):
8 | """The world's simplest agent!"""
9 | def __init__(self, action_space):
10 | self.action_space = action_space
11 |
12 | def act(self, observation, reward, done):
13 | return self.action_space.sample()
14 |
15 | if __name__ == '__main__':
16 | parser = argparse.ArgumentParser(description=None)
17 | parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run')
18 | args = parser.parse_args()
19 |
20 | # You can set the level to logger.DEBUG or logger.WARN if you
21 | # want to change the amount of output.
22 | logger.set_level(logger.INFO)
23 |
24 | env = gym.make(args.env_id)
25 |
26 | # You provide the directory to write to (can be an existing
27 | # directory, including one with existing data -- all monitor files
28 | # will be namespaced). You can also dump to a tempdir if you'd
29 | # like: tempfile.mkdtemp().
30 | outdir = '/tmp/random-agent-results'
31 | env = wrappers.Monitor(env, directory=outdir, force=True)
32 | env.seed(0)
33 | agent = RandomAgent(env.action_space)
34 |
35 | episode_count = 100
36 | reward = 0
37 | done = False
38 |
39 | for i in range(episode_count):
40 | ob = env.reset()
41 | while True:
42 | action = agent.act(ob, reward, done)
43 | ob, reward, done, _ = env.step(action)
44 | if done:
45 | break
46 | # Note there's no env.render() here. But the environment still can open window and
47 | # render if asked by env.monitor: it calls env.render('rgb_array') to record video.
48 | # Video is not recorded every episode, see capped_cubic_video_schedule for details.
49 |
50 | # Close the env and write monitor result info to disk
51 | env.close()
52 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/inverted_double_pendulum.xml:
--------------------------------------------------------------------------------
1 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/pusher.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | import mujoco_py
6 |
7 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
8 | def __init__(self):
9 | utils.EzPickle.__init__(self)
10 | mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
11 |
12 | def step(self, a):
13 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
14 | vec_2 = self.get_body_com("object") - self.get_body_com("goal")
15 |
16 | reward_near = - np.linalg.norm(vec_1)
17 | reward_dist = - np.linalg.norm(vec_2)
18 | reward_ctrl = - np.square(a).sum()
19 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
20 |
21 | self.do_simulation(a, self.frame_skip)
22 | ob = self._get_obs()
23 | done = False
24 | return ob, reward, done, dict(reward_dist=reward_dist,
25 | reward_ctrl=reward_ctrl)
26 |
27 | def viewer_setup(self):
28 | self.viewer.cam.trackbodyid = -1
29 | self.viewer.cam.distance = 4.0
30 |
31 | def reset_model(self):
32 | qpos = self.init_qpos
33 |
34 | self.goal_pos = np.asarray([0, 0])
35 | while True:
36 | self.cylinder_pos = np.concatenate([
37 | self.np_random.uniform(low=-0.3, high=0, size=1),
38 | self.np_random.uniform(low=-0.2, high=0.2, size=1)])
39 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
40 | break
41 |
42 | qpos[-4:-2] = self.cylinder_pos
43 | qpos[-2:] = self.goal_pos
44 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
45 | high=0.005, size=self.model.nv)
46 | qvel[-4:] = 0
47 | self.set_state(qpos, qvel)
48 | return self._get_obs()
49 |
50 | def _get_obs(self):
51 | return np.concatenate([
52 | self.sim.data.qpos.flat[:7],
53 | self.sim.data.qvel.flat[:7],
54 | self.get_body_com("tips_arm"),
55 | self.get_body_com("object"),
56 | self.get_body_com("goal"),
57 | ])
58 |
--------------------------------------------------------------------------------
/gym/vector/tests/test_spaces.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym.spaces import Box, MultiDiscrete, Tuple, Dict
5 | from gym.vector.tests.utils import spaces
6 |
7 | from gym.vector.utils.spaces import _BaseGymSpaces, batch_space
8 |
9 | expected_batch_spaces_4 = [
10 | Box(low=-1., high=1., shape=(4,), dtype=np.float64),
11 | Box(low=0., high=10., shape=(4, 1), dtype=np.float32),
12 | Box(low=np.array([[-1., 0., 0.], [-1., 0., 0.], [-1., 0., 0.], [-1., 0., 0.]]),
13 | high=np.array([[1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.]]), dtype=np.float32),
14 | Box(low=np.array([[[-1., 0.], [0., -1.]], [[-1., 0.], [0., -1.]], [[-1., 0.], [0., -1]],
15 | [[-1., 0.], [0., -1.]]]), high=np.ones((4, 2, 2)), dtype=np.float32),
16 | Box(low=0, high=255, shape=(4,), dtype=np.uint8),
17 | Box(low=0, high=255, shape=(4, 32, 32, 3), dtype=np.uint8),
18 | MultiDiscrete([2, 2, 2, 2]),
19 | Tuple((MultiDiscrete([3, 3, 3, 3]), MultiDiscrete([5, 5, 5, 5]))),
20 | Tuple((MultiDiscrete([7, 7, 7, 7]), Box(low=np.array([[0., -1.], [0., -1.], [0., -1.], [0., -1]]),
21 | high=np.array([[1., 1.], [1., 1.], [1., 1.], [1., 1.]]), dtype=np.float32))),
22 | Box(low=np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]),
23 | high=np.array([[10, 12, 16], [10, 12, 16], [10, 12, 16], [10, 12, 16]]), dtype=np.int64),
24 | Box(low=0, high=1, shape=(4, 19), dtype=np.int8),
25 | Dict({
26 | 'position': MultiDiscrete([23, 23, 23, 23]),
27 | 'velocity': Box(low=0., high=1., shape=(4, 1), dtype=np.float32)
28 | }),
29 | Dict({
30 | 'position': Dict({'x': MultiDiscrete([29, 29, 29, 29]), 'y': MultiDiscrete([31, 31, 31, 31])}),
31 | 'velocity': Tuple((MultiDiscrete([37, 37, 37, 37]), Box(low=0, high=255, shape=(4,), dtype=np.uint8)))
32 | })
33 | ]
34 |
35 | @pytest.mark.parametrize('space,expected_batch_space_4', list(zip(spaces,
36 | expected_batch_spaces_4)), ids=[space.__class__.__name__ for space in spaces])
37 | def test_batch_space(space, expected_batch_space_4):
38 | batch_space_4 = batch_space(space, n=4)
39 | assert batch_space_4 == expected_batch_space_4
40 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/shared_asset.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/utils/atomic_write.py:
--------------------------------------------------------------------------------
1 | # Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
2 |
3 | import os
4 | from contextlib import contextmanager
5 |
6 | # We would ideally atomically replace any existing file with the new
7 | # version. However, on Windows there's no Python-only solution prior
8 | # to Python 3.3. (This library includes a C extension to do so:
9 | # https://pypi.python.org/pypi/pyosreplace/0.1.)
10 | #
11 | # Correspondingly, we make a best effort, but on Python < 3.3 use a
12 | # replace method which could result in the file temporarily
13 | # disappearing.
14 | import sys
15 | if sys.version_info >= (3, 3):
16 | # Python 3.3 and up have a native `replace` method
17 | from os import replace
18 | elif sys.platform.startswith("win"):
19 | def replace(src, dst):
20 | # TODO: on Windows, this will raise if the file is in use,
21 | # which is possible. We'll need to make this more robust over
22 | # time.
23 | try:
24 | os.remove(dst)
25 | except OSError:
26 | pass
27 | os.rename(src, dst)
28 | else:
29 | # POSIX rename() is always atomic
30 | from os import rename as replace
31 |
32 | @contextmanager
33 | def atomic_write(filepath, binary=False, fsync=False):
34 | """ Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
35 |
36 | :param filepath: the file path to be opened
37 | :param binary: whether to open the file in a binary mode instead of textual
38 | :param fsync: whether to force write the file to disk
39 | """
40 |
41 | tmppath = filepath + '~'
42 | while os.path.isfile(tmppath):
43 | tmppath += '~'
44 | try:
45 | with open(tmppath, 'wb' if binary else 'w') as file:
46 | yield file
47 | if fsync:
48 | file.flush()
49 | os.fsync(file.fileno())
50 | replace(tmppath, filepath)
51 | finally:
52 | try:
53 | os.remove(tmppath)
54 | except (IOError, OSError):
55 | pass
56 |
--------------------------------------------------------------------------------
/.github/stale.yml:
--------------------------------------------------------------------------------
1 | # Configuration for probot-stale - https://github.com/probot/stale
2 |
3 | # Number of days of inactivity before an Issue or Pull Request becomes stale
4 | daysUntilStale: 60
5 |
6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
8 | daysUntilClose: 14
9 |
10 | # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
11 | onlyLabels:
12 | - more-information-needed
13 |
14 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
15 | exemptLabels:
16 | - pinned
17 | - security
18 | - "[Status] Maybe Later"
19 |
20 | # Set to true to ignore issues in a project (defaults to false)
21 | exemptProjects: true
22 |
23 | # Set to true to ignore issues in a milestone (defaults to false)
24 | exemptMilestones: true
25 |
26 | # Set to true to ignore issues with an assignee (defaults to false)
27 | exemptAssignees: true
28 |
29 | # Label to use when marking as stale
30 | staleLabel: stale
31 |
32 | # Comment to post when marking as stale. Set to `false` to disable
33 | markComment: >
34 | This issue has been automatically marked as stale because it has not had
35 | recent activity. It will be closed if no further activity occurs. Thank you
36 | for your contributions.
37 |
38 | # Comment to post when removing the stale label.
39 | # unmarkComment: >
40 | # Your comment here.
41 |
42 | # Comment to post when closing a stale Issue or Pull Request.
43 | # closeComment: >
44 | # Your comment here.
45 |
46 | # Limit the number of actions per hour, from 1-30. Default is 30
47 | limitPerRun: 30
48 |
49 | # Limit to only `issues` or `pulls`
50 | only: issues
51 |
52 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
53 | # pulls:
54 | # daysUntilStale: 30
55 | # markComment: >
56 | # This pull request has been automatically marked as stale because it has not had
57 | # recent activity. It will be closed if no further activity occurs. Thank you
58 | # for your contributions.
59 |
60 | # issues:
61 | # exemptLabels:
62 | # - confirmed
--------------------------------------------------------------------------------
/gym/envs/robotics/hand_env.py:
--------------------------------------------------------------------------------
1 | import os
2 | import copy
3 | import numpy as np
4 |
5 | import gym
6 | from gym import error, spaces
7 | from gym.utils import seeding
8 | from gym.envs.robotics import robot_env
9 |
10 |
11 | class HandEnv(robot_env.RobotEnv):
12 | def __init__(self, model_path, n_substeps, initial_qpos, relative_control):
13 | self.relative_control = relative_control
14 |
15 | super(HandEnv, self).__init__(
16 | model_path=model_path, n_substeps=n_substeps, n_actions=20,
17 | initial_qpos=initial_qpos)
18 |
19 | # RobotEnv methods
20 | # ----------------------------
21 |
22 | def _set_action(self, action):
23 | assert action.shape == (20,)
24 |
25 | ctrlrange = self.sim.model.actuator_ctrlrange
26 | actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.
27 | if self.relative_control:
28 | actuation_center = np.zeros_like(action)
29 | for i in range(self.sim.data.ctrl.shape[0]):
30 | actuation_center[i] = self.sim.data.get_joint_qpos(
31 | self.sim.model.actuator_names[i].replace(':A_', ':'))
32 | for joint_name in ['FF', 'MF', 'RF', 'LF']:
33 | act_idx = self.sim.model.actuator_name2id(
34 | 'robot0:A_{}J1'.format(joint_name))
35 | actuation_center[act_idx] += self.sim.data.get_joint_qpos(
36 | 'robot0:{}J0'.format(joint_name))
37 | else:
38 | actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.
39 | self.sim.data.ctrl[:] = actuation_center + action * actuation_range
40 | self.sim.data.ctrl[:] = np.clip(self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1])
41 |
42 | def _viewer_setup(self):
43 | body_id = self.sim.model.body_name2id('robot0:palm')
44 | lookat = self.sim.data.body_xpos[body_id]
45 | for idx, value in enumerate(lookat):
46 | self.viewer.cam.lookat[idx] = value
47 | self.viewer.cam.distance = 0.5
48 | self.viewer.cam.azimuth = 55.
49 | self.viewer.cam.elevation = -25.
50 |
51 | def render(self, mode='human', width=500, height=500):
52 | return super(HandEnv, self).render(mode, width, height)
53 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | import sys, os.path
3 |
4 | # Don't import gym module here, since deps may not be installed
5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'gym'))
6 | from version import VERSION
7 |
8 | # Environment-specific dependencies.
9 | extras = {
10 | 'atari': ['atari_py~=0.2.0', 'Pillow', 'opencv-python'],
11 | 'box2d': ['box2d-py~=2.3.5'],
12 | 'classic_control': [],
13 | 'mujoco': ['mujoco_py>=1.50, <2.0', 'imageio'],
14 | 'robotics': ['mujoco_py>=1.50, <2.0', 'imageio'],
15 | }
16 |
17 | # Meta dependency groups.
18 | extras['all'] = [item for group in extras.values() for item in group]
19 |
20 | setup(name='gym',
21 | version=VERSION,
22 | description='The OpenAI Gym: A toolkit for developing and comparing your reinforcement learning agents.',
23 | url='https://github.com/openai/gym',
24 | author='OpenAI',
25 | author_email='gym@openai.com',
26 | license='',
27 | packages=[package for package in find_packages()
28 | if package.startswith('gym')],
29 | zip_safe=False,
30 | install_requires=[
31 | 'scipy', 'numpy>=1.10.4', 'six', 'pyglet>=1.2.0,<=1.3.2', 'cloudpickle~=1.2.0',
32 | 'enum34~=1.1.6;python_version<"3.4"', 'opencv-python'
33 | ],
34 | extras_require=extras,
35 | package_data={'gym': [
36 | 'envs/mujoco/assets/*.xml',
37 | 'envs/classic_control/assets/*.png',
38 | 'envs/robotics/assets/LICENSE.md',
39 | 'envs/robotics/assets/fetch/*.xml',
40 | 'envs/robotics/assets/hand/*.xml',
41 | 'envs/robotics/assets/stls/fetch/*.stl',
42 | 'envs/robotics/assets/stls/hand/*.stl',
43 | 'envs/robotics/assets/textures/*.png']
44 | },
45 | tests_require=['pytest', 'mock'],
46 | python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*',
47 | classifiers=[
48 | 'Programming Language :: Python :: 2',
49 | 'Programming Language :: Python :: 2.7',
50 | 'Programming Language :: Python :: 3',
51 | 'Programming Language :: Python :: 3.5',
52 | 'Programming Language :: Python :: 3.6',
53 | 'Programming Language :: Python :: 3.7',
54 | ],
55 | )
56 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/humanoid.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym.envs.mujoco import mujoco_env
3 | from gym import utils
4 |
5 | def mass_center(model, sim):
6 | mass = np.expand_dims(model.body_mass, 1)
7 | xpos = sim.data.xipos
8 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
9 |
10 | class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
11 | def __init__(self):
12 | mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
13 | utils.EzPickle.__init__(self)
14 |
15 | def _get_obs(self):
16 | data = self.sim.data
17 | return np.concatenate([data.qpos.flat[2:],
18 | data.qvel.flat,
19 | data.cinert.flat,
20 | data.cvel.flat,
21 | data.qfrc_actuator.flat,
22 | data.cfrc_ext.flat])
23 |
24 | def step(self, a):
25 | pos_before = mass_center(self.model, self.sim)
26 | self.do_simulation(a, self.frame_skip)
27 | pos_after = mass_center(self.model, self.sim)
28 | alive_bonus = 5.0
29 | data = self.sim.data
30 | lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
31 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
32 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
33 | quad_impact_cost = min(quad_impact_cost, 10)
34 | reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
35 | qpos = self.sim.data.qpos
36 | done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
37 | return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
38 |
39 | def reset_model(self):
40 | c = 0.01
41 | self.set_state(
42 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
43 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
44 | )
45 | return self._get_obs()
46 |
47 | def viewer_setup(self):
48 | self.viewer.cam.trackbodyid = 1
49 | self.viewer.cam.distance = self.model.stat.extent * 1.0
50 | self.viewer.cam.lookat[2] = 2.0
51 | self.viewer.cam.elevation = -20
52 |
--------------------------------------------------------------------------------
/gym/utils/closer.py:
--------------------------------------------------------------------------------
1 | import atexit
2 | import threading
3 | import weakref
4 |
5 | class Closer(object):
6 | """A registry that ensures your objects get closed, whether manually,
7 | upon garbage collection, or upon exit. To work properly, your
8 | objects need to cooperate and do something like the following:
9 |
10 | ```
11 | closer = Closer()
12 | class Example(object):
13 | def __init__(self):
14 | self._id = closer.register(self)
15 |
16 | def close(self):
17 | # Probably worth making idempotent too!
18 | ...
19 | closer.unregister(self._id)
20 |
21 | def __del__(self):
22 | self.close()
23 | ```
24 |
25 | That is, your objects should:
26 |
27 | - register() themselves and save the returned ID
28 | - unregister() themselves upon close()
29 | - include a __del__ method which close()'s the object
30 | """
31 |
32 | def __init__(self, atexit_register=True):
33 | self.lock = threading.Lock()
34 | self.next_id = -1
35 | self.closeables = weakref.WeakValueDictionary()
36 |
37 | if atexit_register:
38 | atexit.register(self.close)
39 |
40 | def generate_next_id(self):
41 | with self.lock:
42 | self.next_id += 1
43 | return self.next_id
44 |
45 | def register(self, closeable):
46 | """Registers an object with a 'close' method.
47 |
48 | Returns:
49 | int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired.
50 | """
51 | assert hasattr(closeable, 'close'), 'No close method for {}'.format(closeable)
52 |
53 | next_id = self.generate_next_id()
54 | self.closeables[next_id] = closeable
55 | return next_id
56 |
57 | def unregister(self, id):
58 | assert id is not None
59 | if id in self.closeables:
60 | del self.closeables[id]
61 |
62 | def close(self):
63 | # Explicitly fetch all monitors first so that they can't disappear while
64 | # we iterate. cf. http://stackoverflow.com/a/12429620
65 | closeables = list(self.closeables.values())
66 | for closeable in closeables:
67 | closeable.close()
68 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/thrower.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | self._ball_hit_ground = False
9 | self._ball_hit_location = None
10 | mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5)
11 |
12 | def step(self, a):
13 | ball_xy = self.get_body_com("ball")[:2]
14 | goal_xy = self.get_body_com("goal")[:2]
15 |
16 | if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25:
17 | self._ball_hit_ground = True
18 | self._ball_hit_location = self.get_body_com("ball")
19 |
20 | if self._ball_hit_ground:
21 | ball_hit_xy = self._ball_hit_location[:2]
22 | reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy)
23 | else:
24 | reward_dist = -np.linalg.norm(ball_xy - goal_xy)
25 | reward_ctrl = - np.square(a).sum()
26 |
27 | reward = reward_dist + 0.002 * reward_ctrl
28 | self.do_simulation(a, self.frame_skip)
29 | ob = self._get_obs()
30 | done = False
31 | return ob, reward, done, dict(reward_dist=reward_dist,
32 | reward_ctrl=reward_ctrl)
33 |
34 | def viewer_setup(self):
35 | self.viewer.cam.trackbodyid = 0
36 | self.viewer.cam.distance = 4.0
37 |
38 | def reset_model(self):
39 | self._ball_hit_ground = False
40 | self._ball_hit_location = None
41 |
42 | qpos = self.init_qpos
43 | self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3),
44 | self.np_random.uniform(low=-0.3, high=0.3)])
45 |
46 | qpos[-9:-7] = self.goal
47 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
48 | high=0.005, size=self.model.nv)
49 | qvel[7:] = 0
50 | self.set_state(qpos, qvel)
51 | return self._get_obs()
52 |
53 | def _get_obs(self):
54 | return np.concatenate([
55 | self.sim.data.qpos.flat[:7],
56 | self.sim.data.qvel.flat[:7],
57 | self.get_body_com("r_wrist_roll_link"),
58 | self.get_body_com("ball"),
59 | self.get_body_com("goal"),
60 | ])
61 |
--------------------------------------------------------------------------------
/gym/vector/tests/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gym
3 | import time
4 |
5 | from gym.spaces import Box, Discrete, MultiDiscrete, MultiBinary, Tuple, Dict
6 |
7 | spaces = [
8 | Box(low=np.array(-1.), high=np.array(1.), dtype=np.float64),
9 | Box(low=np.array([0.]), high=np.array([10.]), dtype=np.float32),
10 | Box(low=np.array([-1., 0., 0.]), high=np.array([1., 1., 1.]), dtype=np.float32),
11 | Box(low=np.array([[-1., 0.], [0., -1.]]), high=np.ones((2, 2)), dtype=np.float32),
12 | Box(low=0, high=255, shape=(), dtype=np.uint8),
13 | Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8),
14 | Discrete(2),
15 | Tuple((Discrete(3), Discrete(5))),
16 | Tuple((Discrete(7), Box(low=np.array([0., -1.]), high=np.array([1., 1.]), dtype=np.float32))),
17 | MultiDiscrete([11, 13, 17]),
18 | MultiBinary(19),
19 | Dict({
20 | 'position': Discrete(23),
21 | 'velocity': Box(low=np.array([0.]), high=np.array([1.]), dtype=np.float32)
22 | }),
23 | Dict({
24 | 'position': Dict({'x': Discrete(29), 'y': Discrete(31)}),
25 | 'velocity': Tuple((Discrete(37), Box(low=0, high=255, shape=(), dtype=np.uint8)))
26 | })
27 | ]
28 |
29 | HEIGHT, WIDTH = 64, 64
30 |
31 | class UnittestSlowEnv(gym.Env):
32 | def __init__(self, slow_reset=0.3):
33 | super(UnittestSlowEnv, self).__init__()
34 | self.slow_reset = slow_reset
35 | self.observation_space = Box(low=0, high=255,
36 | shape=(HEIGHT, WIDTH, 3), dtype=np.uint8)
37 | self.action_space = Box(low=0., high=1., shape=(), dtype=np.float32)
38 |
39 | def reset(self):
40 | if self.slow_reset > 0:
41 | time.sleep(self.slow_reset)
42 | return self.observation_space.sample()
43 |
44 | def step(self, action):
45 | time.sleep(action)
46 | observation = self.observation_space.sample()
47 | reward, done = 0., False
48 | return observation, reward, done, {}
49 |
50 | def make_env(env_name, seed):
51 | def _make():
52 | env = gym.make(env_name)
53 | env.seed(seed)
54 | return env
55 | return _make
56 |
57 | def make_slow_env(slow_reset, seed):
58 | def _make():
59 | env = UnittestSlowEnv(slow_reset=slow_reset)
60 | env.seed(seed)
61 | return env
62 | return _make
63 |
--------------------------------------------------------------------------------
/examples/scripts/sim_env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import gym
3 | from gym import spaces, envs
4 | import argparse
5 | import numpy as np
6 | import itertools
7 | import time
8 | from builtins import input
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("env")
12 | parser.add_argument("--mode", choices=["noop", "random", "human"],
13 | default="random")
14 | parser.add_argument("--max_steps", type=int, default=0)
15 | parser.add_argument("--fps",type=float)
16 | parser.add_argument("--once", action="store_true")
17 | parser.add_argument("--ignore_done", action="store_true")
18 | args = parser.parse_args()
19 |
20 | env = envs.make(args.env)
21 | ac_space = env.action_space
22 |
23 | fps = args.fps or env.metadata.get('video.frames_per_second') or 100
24 | if args.max_steps == 0: args.max_steps = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']
25 |
26 | while True:
27 | env.reset()
28 | env.render(mode='human')
29 | print("Starting a new trajectory")
30 | for t in range(args.max_steps) if args.max_steps else itertools.count():
31 | done = False
32 | if args.mode == "noop":
33 | if isinstance(ac_space, spaces.Box):
34 | a = np.zeros(ac_space.shape)
35 | elif isinstance(ac_space, spaces.Discrete):
36 | a = 0
37 | else:
38 | raise NotImplementedError("noop not implemented for class {}".format(type(ac_space)))
39 | time.sleep(1.0/fps)
40 | elif args.mode == "random":
41 | a = ac_space.sample()
42 | time.sleep(1.0/fps)
43 | elif args.mode == "human":
44 | a = input("type action from {0,...,%i} and press enter: "%(ac_space.n-1))
45 | try:
46 | a = int(a)
47 | except ValueError:
48 | print("WARNING: ignoring illegal action '{}'.".format(a))
49 | a = 0
50 | if a >= ac_space.n:
51 | print("WARNING: ignoring illegal action {}.".format(a))
52 | a = 0
53 | _, _, done, _ = env.step(a)
54 |
55 | env.render()
56 | if done and not args.ignore_done:
57 | break
58 | print("Done after {} steps".format(t+1))
59 | if args.once:
60 | break
61 | else:
62 | input("Press enter to continue")
63 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_pen.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_registration.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import gym
3 | from gym import error, envs
4 | from gym.envs import registration
5 | from gym.envs.classic_control import cartpole
6 |
7 | class ArgumentEnv(gym.Env):
8 | def __init__(self, arg1, arg2, arg3):
9 | self.arg1 = arg1
10 | self.arg2 = arg2
11 | self.arg3 = arg3
12 |
13 | gym.register(
14 | id='test.ArgumentEnv-v0',
15 | entry_point='gym.envs.tests.test_registration:ArgumentEnv',
16 | kwargs={
17 | 'arg1': 'arg1',
18 | 'arg2': 'arg2',
19 | }
20 | )
21 |
22 | def test_make():
23 | env = envs.make('CartPole-v0')
24 | assert env.spec.id == 'CartPole-v0'
25 | assert isinstance(env.unwrapped, cartpole.CartPoleEnv)
26 |
27 | def test_make_with_kwargs():
28 | env = envs.make('test.ArgumentEnv-v0', arg2='override_arg2', arg3='override_arg3')
29 | assert env.spec.id == 'test.ArgumentEnv-v0'
30 | assert isinstance(env.unwrapped, ArgumentEnv)
31 | assert env.arg1 == 'arg1'
32 | assert env.arg2 == 'override_arg2'
33 | assert env.arg3 == 'override_arg3'
34 |
35 | def test_make_deprecated():
36 | try:
37 | envs.make('Humanoid-v0')
38 | except error.Error:
39 | pass
40 | else:
41 | assert False
42 |
43 | def test_spec():
44 | spec = envs.spec('CartPole-v0')
45 | assert spec.id == 'CartPole-v0'
46 |
47 | def test_missing_lookup():
48 | registry = registration.EnvRegistry()
49 | registry.register(id='Test-v0', entry_point=None)
50 | registry.register(id='Test-v15', entry_point=None)
51 | registry.register(id='Test-v9', entry_point=None)
52 | registry.register(id='Other-v100', entry_point=None)
53 | try:
54 | registry.spec('Test-v1') # must match an env name but not the version above
55 | except error.DeprecatedEnv:
56 | pass
57 | else:
58 | assert False
59 |
60 | try:
61 | registry.spec('Unknown-v1')
62 | except error.UnregisteredEnv:
63 | pass
64 | else:
65 | assert False
66 |
67 | def test_malformed_lookup():
68 | registry = registration.EnvRegistry()
69 | try:
70 | registry.spec(u'“Breakout-v0”')
71 | except error.Error as e:
72 | assert 'malformed environment ID' in '{}'.format(e), 'Unexpected message: {}'.format(e)
73 | else:
74 | assert False
75 |
--------------------------------------------------------------------------------
/gym/vector/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | from collections.abc import Iterable
3 | except ImportError:
4 | Iterable = (tuple, list)
5 |
6 | from gym.vector.async_vector_env import AsyncVectorEnv
7 | from gym.vector.sync_vector_env import SyncVectorEnv
8 | from gym.vector.vector_env import VectorEnv
9 |
10 | __all__ = ['AsyncVectorEnv', 'SyncVectorEnv', 'VectorEnv', 'make']
11 |
12 | def make(id, num_envs=1, asynchronous=True, wrappers=None, **kwargs):
13 | """Create a vectorized environment from multiple copies of an environment,
14 | from its id
15 |
16 | Parameters
17 | ----------
18 | id : str
19 | The environment ID. This must be a valid ID from the registry.
20 |
21 | num_envs : int
22 | Number of copies of the environment.
23 |
24 | asynchronous : bool (default: `True`)
25 | If `True`, wraps the environments in an `AsyncVectorEnv` (which uses
26 | `multiprocessing` to run the environments in parallel). If `False`,
27 | wraps the environments in a `SyncVectorEnv`.
28 |
29 | wrappers : Callable or Iterable of Callables (default: `None`)
30 | If not `None`, then apply the wrappers to each internal
31 | environment during creation.
32 |
33 | Returns
34 | -------
35 | env : `gym.vector.VectorEnv` instance
36 | The vectorized environment.
37 |
38 | Example
39 | -------
40 | >>> import gym
41 | >>> env = gym.vector.make('CartPole-v1', 3)
42 | >>> env.reset()
43 | array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827],
44 | [ 0.03073904, 0.00145001, -0.03088818, -0.03131252],
45 | [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]],
46 | dtype=float32)
47 | """
48 | from gym.envs import make as make_
49 | def _make_env():
50 | env = make_(id, **kwargs)
51 | if wrappers is not None:
52 | if callable(wrappers):
53 | env = wrappers(env)
54 | elif isinstance(wrappers, Iterable) and all([callable(w) for w in wrappers]):
55 | for wrapper in wrappers:
56 | env = wrapper(env)
57 | else:
58 | raise NotImplementedError
59 | return env
60 | env_fns = [_make_env for _ in range(num_envs)]
61 | return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns)
62 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/nchain.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.utils import seeding
4 |
5 | class NChainEnv(gym.Env):
6 | """n-Chain environment
7 |
8 | This game presents moves along a linear chain of states, with two actions:
9 | 0) forward, which moves along the chain but returns no reward
10 | 1) backward, which returns to the beginning and has a small reward
11 |
12 | The end of the chain, however, presents a large reward, and by moving
13 | 'forward' at the end of the chain this large reward can be repeated.
14 |
15 | At each action, there is a small probability that the agent 'slips' and the
16 | opposite transition is instead taken.
17 |
18 | The observed state is the current state in the chain (0 to n-1).
19 |
20 | This environment is described in section 6.1 of:
21 | A Bayesian Framework for Reinforcement Learning by Malcolm Strens (2000)
22 | http://ceit.aut.ac.ir/~shiry/lecture/machine-learning/papers/BRL-2000.pdf
23 | """
24 | def __init__(self, n=5, slip=0.2, small=2, large=10):
25 | self.n = n
26 | self.slip = slip # probability of 'slipping' an action
27 | self.small = small # payout for 'backwards' action
28 | self.large = large # payout at end of chain for 'forwards' action
29 | self.state = 0 # Start at beginning of the chain
30 | self.action_space = spaces.Discrete(2)
31 | self.observation_space = spaces.Discrete(self.n)
32 | self.seed()
33 |
34 | def seed(self, seed=None):
35 | self.np_random, seed = seeding.np_random(seed)
36 | return [seed]
37 |
38 | def step(self, action):
39 | assert self.action_space.contains(action)
40 | if self.np_random.rand() < self.slip:
41 | action = not action # agent slipped, reverse action taken
42 | if action: # 'backwards': go back to the beginning, get small reward
43 | reward = self.small
44 | self.state = 0
45 | elif self.state < self.n - 1: # 'forwards': go up along the chain
46 | reward = 0
47 | self.state += 1
48 | else: # 'forwards': stay at the end of the chain, collect large reward
49 | reward = self.large
50 | done = False
51 | return self.state, reward, done, {}
52 |
53 | def reset(self):
54 | self.state = 0
55 | return self.state
56 |
--------------------------------------------------------------------------------
/gym/spaces/multi_discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class MultiDiscrete(Space):
6 | """
7 | - The multi-discrete action space consists of a series of discrete action spaces with different number of actions in eachs
8 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
9 | - It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space
10 |
11 | Note: Some environment wrappers assume a value of 0 always represents the NOOP action.
12 |
13 | e.g. Nintendo Game Controller
14 | - Can be conceptualized as 3 discrete action spaces:
15 |
16 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
17 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
18 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
19 |
20 | - Can be initialized as
21 |
22 | MultiDiscrete([ 5, 2, 2 ])
23 |
24 | """
25 | def __init__(self, nvec):
26 |
27 | """
28 | nvec: vector of counts of each categorical variable
29 | """
30 | assert (np.array(nvec) > 0).all(), 'nvec (counts) have to be positive'
31 | self.nvec = np.asarray(nvec, dtype=np.int64)
32 |
33 | super(MultiDiscrete, self).__init__(self.nvec.shape, np.int64)
34 |
35 | def sample(self):
36 | return (self.np_random.random_sample(self.nvec.shape)*self.nvec).astype(self.dtype)
37 |
38 | def contains(self, x):
39 | if isinstance(x, list):
40 | x = np.array(x) # Promote list to array for contains check
41 | # if nvec is uint32 and space dtype is uint32, then 0 <= x < self.nvec guarantees that x
42 | # is within correct bounds for space dtype (even though x does not have to be unsigned)
43 | return x.shape == self.shape and (0 <= x).all() and (x < self.nvec).all()
44 |
45 | def to_jsonable(self, sample_n):
46 | return [sample.tolist() for sample in sample_n]
47 |
48 | def from_jsonable(self, sample_n):
49 | return np.array(sample_n)
50 |
51 | def __repr__(self):
52 | return "MultiDiscrete({})".format(self.nvec)
53 |
54 | def __eq__(self, other):
55 | return isinstance(other, MultiDiscrete) and np.all(self.nvec == other.nvec)
56 |
--------------------------------------------------------------------------------
/gym/wrappers/test_atari_preprocessing.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gym
3 | from gym.wrappers import AtariPreprocessing
4 | import pytest
5 |
6 | pytest.importorskip('atari_py')
7 |
8 |
9 | @pytest.fixture(scope='module')
10 | def env_fn():
11 | return lambda: gym.make('PongNoFrameskip-v4')
12 |
13 |
14 | def test_atari_preprocessing_grayscale(env_fn):
15 | import cv2
16 | env1 = env_fn()
17 | env2 = AtariPreprocessing(env_fn(), screen_size=84, grayscale_obs=True, frame_skip=1, noop_max=0)
18 | env3 = AtariPreprocessing(env_fn(), screen_size=84, grayscale_obs=False, frame_skip=1, noop_max=0)
19 | env1.seed(0)
20 | env2.seed(0)
21 | env3.seed(0)
22 | obs1 = env1.reset()
23 | obs2 = env2.reset()
24 | obs3 = env3.reset()
25 | assert obs1.shape == (210, 160, 3)
26 | assert obs2.shape == (84, 84)
27 | assert obs3.shape == (84, 84, 3)
28 | assert np.allclose(obs3, cv2.resize(obs1, (84, 84), interpolation=cv2.INTER_AREA))
29 | obs3_gray = cv2.cvtColor(obs3, cv2.COLOR_RGB2GRAY)
30 | # the edges of the numbers do not render quite the same in the grayscale, so we ignore them
31 | assert np.allclose(obs2[10:38], obs3_gray[10:38])
32 | # the paddle also do not render quite the same
33 | assert np.allclose(obs2[44:], obs3_gray[44:])
34 |
35 | env1.close()
36 | env2.close()
37 | env3.close()
38 |
39 |
40 | def test_atari_preprocessing_scale(env_fn):
41 | # arbitrarily chosen number for stepping into env. and ensuring all observations are in the required range
42 | max_test_steps = 10
43 |
44 | for grayscale in [True, False]:
45 | for scaled in [True, False]:
46 | env = AtariPreprocessing(env_fn(), screen_size=84, grayscale_obs=grayscale, scale_obs=scaled,
47 | frame_skip=1, noop_max=0)
48 | obs = env.reset().flatten()
49 | done, step_i = False, 0
50 | max_obs = 1 if scaled else 255
51 | assert (0 <= obs).all() and (obs <= max_obs).all(), 'Obs. must be in range [0,{}]'.format(max_obs)
52 | while not done or step_i <= max_test_steps:
53 | obs, _, done, _ = env.step(env.action_space.sample())
54 | obs = obs.flatten()
55 | assert (0 <= obs).all() and (obs <= max_obs).all(), 'Obs. must be in range [0,{}]'.format(max_obs)
56 | step_i += 1
57 |
58 | env.close()
59 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_envs.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym import envs
5 | from gym.envs.tests.spec_list import spec_list
6 |
7 | # This runs a smoketest on each official registered env. We may want
8 | # to try also running environments which are not officially registered
9 | # envs.
10 | @pytest.mark.parametrize("spec", spec_list)
11 | def test_env(spec):
12 | # Capture warnings
13 | with pytest.warns(None) as warnings:
14 | env = spec.make()
15 |
16 | # Check that dtype is explicitly declared for gym.Box spaces
17 | for warning_msg in warnings:
18 | assert not 'autodetected dtype' in str(warning_msg.message)
19 |
20 | ob_space = env.observation_space
21 | act_space = env.action_space
22 | ob = env.reset()
23 | assert ob_space.contains(ob), 'Reset observation: {!r} not in space'.format(ob)
24 | a = act_space.sample()
25 | observation, reward, done, _info = env.step(a)
26 | assert ob_space.contains(observation), 'Step observation: {!r} not in space'.format(observation)
27 | assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env)
28 | assert isinstance(done, bool), "Expected {} to be a boolean".format(done)
29 |
30 | for mode in env.metadata.get('render.modes', []):
31 | env.render(mode=mode)
32 |
33 | # Make sure we can render the environment after close.
34 | for mode in env.metadata.get('render.modes', []):
35 | env.render(mode=mode)
36 |
37 | env.close()
38 |
39 | # Run a longer rollout on some environments
40 | def test_random_rollout():
41 | for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
42 | agent = lambda ob: env.action_space.sample()
43 | ob = env.reset()
44 | for _ in range(10):
45 | assert env.observation_space.contains(ob)
46 | a = agent(ob)
47 | assert env.action_space.contains(a)
48 | (ob, _reward, done, _info) = env.step(a)
49 | if done: break
50 | env.close()
51 |
52 |
53 | def test_env_render_result_is_immutable():
54 | from six import string_types
55 | environs = [
56 | envs.make('Taxi-v3'),
57 | envs.make('FrozenLake-v0'),
58 | envs.make('Reverse-v0'),
59 | ]
60 |
61 | for env in environs:
62 | env.reset()
63 | output = env.render(mode='ansi')
64 | assert isinstance(output, string_types)
65 | env.close()
66 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/hotter_colder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym import spaces
5 | from gym.utils import seeding
6 |
7 |
8 | class HotterColder(gym.Env):
9 | """Hotter Colder
10 | The goal of hotter colder is to guess closer to a randomly selected number
11 |
12 | After each step the agent receives an observation of:
13 | 0 - No guess yet submitted (only after reset)
14 | 1 - Guess is lower than the target
15 | 2 - Guess is equal to the target
16 | 3 - Guess is higher than the target
17 |
18 | The rewards is calculated as:
19 | (min(action, self.number) + self.range) / (max(action, self.number) + self.range)
20 |
21 | Ideally an agent will be able to recognise the 'scent' of a higher reward and
22 | increase the rate in which is guesses in that direction until the reward reaches
23 | its maximum
24 | """
25 | def __init__(self):
26 | self.range = 1000 # +/- value the randomly select number can be between
27 | self.bounds = 2000 # Action space bounds
28 |
29 | self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
30 | dtype=np.float32)
31 | self.observation_space = spaces.Discrete(4)
32 |
33 | self.number = 0
34 | self.guess_count = 0
35 | self.guess_max = 200
36 | self.observation = 0
37 |
38 | self.seed()
39 | self.reset()
40 |
41 | def seed(self, seed=None):
42 | self.np_random, seed = seeding.np_random(seed)
43 | return [seed]
44 |
45 | def step(self, action):
46 | assert self.action_space.contains(action)
47 |
48 | if action < self.number:
49 | self.observation = 1
50 |
51 | elif action == self.number:
52 | self.observation = 2
53 |
54 | elif action > self.number:
55 | self.observation = 3
56 |
57 | reward = ((min(action, self.number) + self.bounds) / (max(action, self.number) + self.bounds)) ** 2
58 |
59 | self.guess_count += 1
60 | done = self.guess_count >= self.guess_max
61 |
62 | return self.observation, reward[0], done, {"number": self.number, "guesses": self.guess_count}
63 |
64 | def reset(self):
65 | self.number = self.np_random.uniform(-self.range, self.range)
66 | self.guess_count = 0
67 | self.observation = 0
68 | return self.observation
69 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_pen_touch_sensors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/swimmer.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/docs/agents.md:
--------------------------------------------------------------------------------
1 | # Agents
2 |
3 | An "agent" describes the method of running an RL algorithm against an environment in the gym. The agent may contain the algorithm itself or simply provide an integration between an algorithm and the gym environments.
4 |
5 | ## RandomAgent
6 |
7 | A sample agent located in this repo at `gym/examples/agents/random_agent.py`. This simple agent leverages the environments ability to produce a random valid action and does so for each step.
8 |
9 | ## cem.py
10 |
11 | A generic Cross-Entropy agent located in this repo at `gym/examples/agents/cem.py`. This agent defaults to 10 iterations of 25 episodes considering the top 20% "elite".
12 |
13 | ## dqn
14 |
15 | This is a very basic DQN (with experience replay) implementation, which uses OpenAI's gym environment and Keras/Theano neural networks. [/sherjilozair/dqn](https://github.com/sherjilozair/dqn)
16 |
17 | ## Simple DQN
18 |
19 | Simple, fast and easy to extend DQN implementation using [Neon](https://github.com/NervanaSystems/neon) deep learning library. Comes with out-of-box tools to train, test and visualize models. For details see [this blog post](https://www.nervanasys.com/deep-reinforcement-learning-with-neon/) or check out the [repo](https://github.com/tambetm/simple_dqn).
20 |
21 | ## AgentNet
22 | A library that allows you to develop custom deep/convolutional/recurrent reinforcement learning agent with full integration with Theano/Lasagne. Also contains a toolkit for various reinforcement learning algorithms, policies, memory augmentations, etc.
23 |
24 | - The repo's here: [AgentNet](https://github.com/yandexdataschool/AgentNet)
25 | - [A step-by-step demo for Atari SpaceInvaders ](https://github.com/yandexdataschool/AgentNet/blob/master/examples/Playing%20Atari%20with%20Deep%20Reinforcement%20Learning%20%28OpenAI%20Gym%29.ipynb)
26 |
27 | ## rllab
28 |
29 | a framework for developing and evaluating reinforcement learning algorithms, fully compatible with OpenAI Gym. It includes a wide range of continuous control tasks plus implementations of many algorithms. [/rllab/rllab](https://github.com/rllab/rllab)
30 |
31 | ## [keras-rl](https://github.com/matthiasplappert/keras-rl)
32 |
33 | [keras-rl](https://github.com/matthiasplappert/keras-rl) implements some state-of-the art deep reinforcement learning algorithms. It was built with OpenAI Gym in mind, and also built on top of the deep learning library [Keras](https://keras.io/) and utilises similar design patterns like callbacks and user-definable metrics.
34 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_egg.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/reacher.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_block.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/gym/wrappers/filter_observation.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | from gym import spaces
4 | from gym import ObservationWrapper
5 |
6 |
7 | class FilterObservation(ObservationWrapper):
8 | """Filter dictionary observations by their keys.
9 |
10 | Args:
11 | env: The environment to wrap.
12 | filter_keys: List of keys to be included in the observations.
13 |
14 | Raises:
15 | ValueError: If observation keys in not instance of None or
16 | iterable.
17 | ValueError: If any of the `filter_keys` are not included in
18 | the original `env`'s observation space
19 |
20 | """
21 | def __init__(self, env, filter_keys=None):
22 | super(FilterObservation, self).__init__(env)
23 |
24 | wrapped_observation_space = env.observation_space
25 | assert isinstance(wrapped_observation_space, spaces.Dict), (
26 | "FilterObservationWrapper is only usable with dict observations.")
27 |
28 | observation_keys = wrapped_observation_space.spaces.keys()
29 |
30 | if filter_keys is None:
31 | filter_keys = tuple(observation_keys)
32 |
33 | missing_keys = set(
34 | key for key in filter_keys if key not in observation_keys)
35 |
36 | if missing_keys:
37 | raise ValueError(
38 | "All the filter_keys must be included in the "
39 | "original obsrevation space.\n"
40 | "Filter keys: {filter_keys}\n"
41 | "Observation keys: {observation_keys}\n"
42 | "Missing keys: {missing_keys}".format(
43 | filter_keys=filter_keys,
44 | observation_keys=observation_keys,
45 | missing_keys=missing_keys,
46 | ))
47 |
48 | self.observation_space = type(wrapped_observation_space)([
49 | (name, copy.deepcopy(space))
50 | for name, space in wrapped_observation_space.spaces.items()
51 | if name in filter_keys
52 | ])
53 |
54 | self._env = env
55 | self._filter_keys = tuple(filter_keys)
56 |
57 | def observation(self, observation):
58 | filter_observation = self._filter_observation(observation)
59 | return filter_observation
60 |
61 | def _filter_observation(self, observation):
62 | observation = type(observation)([
63 | (name, value)
64 | for name, value in observation.items()
65 | if name in self._filter_keys
66 | ])
67 | return observation
68 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_egg_touch_sensors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/gym/vector/tests/test_sync_vector_env.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym.spaces import Box
5 | from gym.vector.tests.utils import make_env
6 |
7 | from gym.vector.sync_vector_env import SyncVectorEnv
8 |
9 | def test_create_sync_vector_env():
10 | env_fns = [make_env('CubeCrash-v0', i) for i in range(8)]
11 | try:
12 | env = SyncVectorEnv(env_fns)
13 | finally:
14 | env.close()
15 |
16 | assert env.num_envs == 8
17 |
18 |
19 | def test_reset_sync_vector_env():
20 | env_fns = [make_env('CubeCrash-v0', i) for i in range(8)]
21 | try:
22 | env = SyncVectorEnv(env_fns)
23 | observations = env.reset()
24 | finally:
25 | env.close()
26 |
27 | assert isinstance(env.observation_space, Box)
28 | assert isinstance(observations, np.ndarray)
29 | assert observations.dtype == env.observation_space.dtype
30 | assert observations.shape == (8,) + env.single_observation_space.shape
31 | assert observations.shape == env.observation_space.shape
32 |
33 |
34 | @pytest.mark.parametrize('use_single_action_space', [True, False])
35 | def test_step_sync_vector_env(use_single_action_space):
36 | env_fns = [make_env('CubeCrash-v0', i) for i in range(8)]
37 | try:
38 | env = SyncVectorEnv(env_fns)
39 | observations = env.reset()
40 | if use_single_action_space:
41 | actions = [env.single_action_space.sample() for _ in range(8)]
42 | else:
43 | actions = env.action_space.sample()
44 | observations, rewards, dones, _ = env.step(actions)
45 | finally:
46 | env.close()
47 |
48 | assert isinstance(env.observation_space, Box)
49 | assert isinstance(observations, np.ndarray)
50 | assert observations.dtype == env.observation_space.dtype
51 | assert observations.shape == (8,) + env.single_observation_space.shape
52 | assert observations.shape == env.observation_space.shape
53 |
54 | assert isinstance(rewards, np.ndarray)
55 | assert isinstance(rewards[0], (float, np.floating))
56 | assert rewards.ndim == 1
57 | assert rewards.size == 8
58 |
59 | assert isinstance(dones, np.ndarray)
60 | assert dones.dtype == np.bool_
61 | assert dones.ndim == 1
62 | assert dones.size == 8
63 |
64 |
65 | def test_check_observations_sync_vector_env():
66 | # CubeCrash-v0 - observation_space: Box(40, 32, 3)
67 | env_fns = [make_env('CubeCrash-v0', i) for i in range(8)]
68 | # MemorizeDigits-v0 - observation_space: Box(24, 32, 3)
69 | env_fns[1] = make_env('MemorizeDigits-v0', 1)
70 | with pytest.raises(RuntimeError):
71 | env = SyncVectorEnv(env_fns)
72 | env.close()
73 |
--------------------------------------------------------------------------------
/gym/vector/utils/spaces.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import OrderedDict
3 |
4 | from gym.spaces import Box, Discrete, MultiDiscrete, MultiBinary, Tuple, Dict
5 |
6 | _BaseGymSpaces = (Box, Discrete, MultiDiscrete, MultiBinary)
7 | __all__ = ['_BaseGymSpaces', 'batch_space']
8 |
9 | def batch_space(space, n=1):
10 | """Create a (batched) space, containing multiple copies of a single space.
11 |
12 | Parameters
13 | ----------
14 | space : `gym.spaces.Space` instance
15 | Space (e.g. the observation space) for a single environment in the
16 | vectorized environment.
17 |
18 | n : int
19 | Number of environments in the vectorized environment.
20 |
21 | Returns
22 | -------
23 | batched_space : `gym.spaces.Space` instance
24 | Space (e.g. the observation space) for a batch of environments in the
25 | vectorized environment.
26 |
27 | Example
28 | -------
29 | >>> from gym.spaces import Box, Dict
30 | >>> space = Dict({
31 | ... 'position': Box(low=0, high=1, shape=(3,), dtype=np.float32),
32 | ... 'velocity': Box(low=0, high=1, shape=(2,), dtype=np.float32)})
33 | >>> batch_space(space, n=5)
34 | Dict(position:Box(5, 3), velocity:Box(5, 2))
35 | """
36 | if isinstance(space, _BaseGymSpaces):
37 | return batch_space_base(space, n=n)
38 | elif isinstance(space, Tuple):
39 | return batch_space_tuple(space, n=n)
40 | elif isinstance(space, Dict):
41 | return batch_space_dict(space, n=n)
42 | else:
43 | raise NotImplementedError()
44 |
45 | def batch_space_base(space, n=1):
46 | if isinstance(space, Box):
47 | repeats = tuple([n] + [1] * space.low.ndim)
48 | low, high = np.tile(space.low, repeats), np.tile(space.high, repeats)
49 | return Box(low=low, high=high, dtype=space.dtype)
50 |
51 | elif isinstance(space, Discrete):
52 | return MultiDiscrete(np.full((n,), space.n, dtype=space.dtype))
53 |
54 | elif isinstance(space, MultiDiscrete):
55 | repeats = tuple([n] + [1] * space.nvec.ndim)
56 | high = np.tile(space.nvec, repeats) - 1
57 | return Box(low=np.zeros_like(high), high=high, dtype=space.dtype)
58 |
59 | elif isinstance(space, MultiBinary):
60 | return Box(low=0, high=1, shape=(n,) + space.shape, dtype=space.dtype)
61 |
62 | else:
63 | raise NotImplementedError()
64 |
65 | def batch_space_tuple(space, n=1):
66 | return Tuple(tuple(batch_space(subspace, n=n) for subspace in space.spaces))
67 |
68 | def batch_space_dict(space, n=1):
69 | return Dict(OrderedDict([(key, batch_space(subspace, n=n))
70 | for (key, subspace) in space.spaces.items()]))
71 |
--------------------------------------------------------------------------------
/examples/agents/keyboard_agent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import print_function
3 |
4 | import sys, gym, time
5 |
6 | #
7 | # Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
8 | #
9 | # python keyboard_agent.py SpaceInvadersNoFrameskip-v4
10 | #
11 |
12 | env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
13 |
14 | if not hasattr(env.action_space, 'n'):
15 | raise Exception('Keyboard agent only supports discrete action spaces')
16 | ACTIONS = env.action_space.n
17 | SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
18 | # can test what skip is still usable.
19 |
20 | human_agent_action = 0
21 | human_wants_restart = False
22 | human_sets_pause = False
23 |
24 | def key_press(key, mod):
25 | global human_agent_action, human_wants_restart, human_sets_pause
26 | if key==0xff0d: human_wants_restart = True
27 | if key==32: human_sets_pause = not human_sets_pause
28 | a = int( key - ord('0') )
29 | if a <= 0 or a >= ACTIONS: return
30 | human_agent_action = a
31 |
32 | def key_release(key, mod):
33 | global human_agent_action
34 | a = int( key - ord('0') )
35 | if a <= 0 or a >= ACTIONS: return
36 | if human_agent_action == a:
37 | human_agent_action = 0
38 |
39 | env.render()
40 | env.unwrapped.viewer.window.on_key_press = key_press
41 | env.unwrapped.viewer.window.on_key_release = key_release
42 |
43 | def rollout(env):
44 | global human_agent_action, human_wants_restart, human_sets_pause
45 | human_wants_restart = False
46 | obser = env.reset()
47 | skip = 0
48 | total_reward = 0
49 | total_timesteps = 0
50 | while 1:
51 | if not skip:
52 | #print("taking action {}".format(human_agent_action))
53 | a = human_agent_action
54 | total_timesteps += 1
55 | skip = SKIP_CONTROL
56 | else:
57 | skip -= 1
58 |
59 | obser, r, done, info = env.step(a)
60 | if r != 0:
61 | print("reward %0.3f" % r)
62 | total_reward += r
63 | window_still_open = env.render()
64 | if window_still_open==False: return False
65 | if done: break
66 | if human_wants_restart: break
67 | while human_sets_pause:
68 | env.render()
69 | time.sleep(0.1)
70 | time.sleep(0.1)
71 | print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
72 |
73 | print("ACTIONS={}".format(ACTIONS))
74 | print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
75 | print("No keys pressed is taking action 0")
76 |
77 | while 1:
78 | window_still_open = rollout(env)
79 | if window_still_open==False: break
80 |
81 |
--------------------------------------------------------------------------------
/gym/spaces/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym.spaces import Box
4 | from gym.spaces import Discrete
5 | from gym.spaces import MultiDiscrete
6 | from gym.spaces import MultiBinary
7 | from gym.spaces import Tuple
8 | from gym.spaces import Dict
9 |
10 |
11 | def flatdim(space):
12 | if isinstance(space, Box):
13 | return int(np.prod(space.shape))
14 | elif isinstance(space, Discrete):
15 | return int(space.n)
16 | elif isinstance(space, Tuple):
17 | return int(sum([flatdim(s) for s in space.spaces]))
18 | elif isinstance(space, Dict):
19 | return int(sum([flatdim(s) for s in space.spaces.values()]))
20 | elif isinstance(space, MultiBinary):
21 | return int(space.n)
22 | elif isinstance(space, MultiDiscrete):
23 | return int(np.prod(space.shape))
24 | else:
25 | raise NotImplementedError
26 |
27 |
28 | def flatten(space, x):
29 | if isinstance(space, Box):
30 | return np.asarray(x, dtype=np.float32).flatten()
31 | elif isinstance(space, Discrete):
32 | onehot = np.zeros(space.n, dtype=np.float32)
33 | onehot[x] = 1.0
34 | return onehot
35 | elif isinstance(space, Tuple):
36 | return np.concatenate([flatten(s, x_part) for x_part, s in zip(x, space.spaces)])
37 | elif isinstance(space, Dict):
38 | return np.concatenate([flatten(s, x[key]) for key, s in space.spaces.items()])
39 | elif isinstance(space, MultiBinary):
40 | return np.asarray(x).flatten()
41 | elif isinstance(space, MultiDiscrete):
42 | return np.asarray(x).flatten()
43 | else:
44 | raise NotImplementedError
45 |
46 |
47 | def unflatten(space, x):
48 | if isinstance(space, Box):
49 | return np.asarray(x, dtype=np.float32).reshape(space.shape)
50 | elif isinstance(space, Discrete):
51 | return int(np.nonzero(x)[0][0])
52 | elif isinstance(space, Tuple):
53 | dims = [flatdim(s) for s in space.spaces]
54 | list_flattened = np.split(x, np.cumsum(dims)[:-1])
55 | list_unflattened = [unflatten(s, flattened)
56 | for flattened, s in zip(list_flattened, space.spaces)]
57 | return tuple(list_unflattened)
58 | elif isinstance(space, Dict):
59 | dims = [flatdim(s) for s in space.spaces.values()]
60 | list_flattened = np.split(x, np.cumsum(dims)[:-1])
61 | list_unflattened = [(key, unflatten(s, flattened))
62 | for flattened, (key, s) in zip(list_flattened, space.spaces.items())]
63 | return dict(list_unflattened)
64 | elif isinstance(space, MultiBinary):
65 | return np.asarray(x).reshape(space.shape)
66 | elif isinstance(space, MultiDiscrete):
67 | return np.asarray(x).reshape(space.shape)
68 | else:
69 | raise NotImplementedError
70 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/striker.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | self._striked = False
9 | self._min_strike_dist = np.inf
10 | self.strike_threshold = 0.1
11 | mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
12 |
13 | def step(self, a):
14 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
15 | vec_2 = self.get_body_com("object") - self.get_body_com("goal")
16 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2))
17 |
18 | if np.linalg.norm(vec_1) < self.strike_threshold:
19 | self._striked = True
20 | self._strike_pos = self.get_body_com("tips_arm")
21 |
22 | if self._striked:
23 | vec_3 = self.get_body_com("object") - self._strike_pos
24 | reward_near = - np.linalg.norm(vec_3)
25 | else:
26 | reward_near = - np.linalg.norm(vec_1)
27 |
28 | reward_dist = - np.linalg.norm(self._min_strike_dist)
29 | reward_ctrl = - np.square(a).sum()
30 | reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
31 |
32 | self.do_simulation(a, self.frame_skip)
33 | ob = self._get_obs()
34 | done = False
35 | return ob, reward, done, dict(reward_dist=reward_dist,
36 | reward_ctrl=reward_ctrl)
37 |
38 | def viewer_setup(self):
39 | self.viewer.cam.trackbodyid = 0
40 | self.viewer.cam.distance = 4.0
41 |
42 | def reset_model(self):
43 | self._min_strike_dist = np.inf
44 | self._striked = False
45 | self._strike_pos = None
46 |
47 | qpos = self.init_qpos
48 |
49 | self.ball = np.array([0.5, -0.175])
50 | while True:
51 | self.goal = np.concatenate([
52 | self.np_random.uniform(low=0.15, high=0.7, size=1),
53 | self.np_random.uniform(low=0.1, high=1.0, size=1)])
54 | if np.linalg.norm(self.ball - self.goal) > 0.17:
55 | break
56 |
57 | qpos[-9:-7] = [self.ball[1], self.ball[0]]
58 | qpos[-7:-5] = self.goal
59 | diff = self.ball - self.goal
60 | angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
61 | qpos[-1] = angle / 3.14
62 | qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
63 | size=self.model.nv)
64 | qvel[7:] = 0
65 | self.set_state(qpos, qvel)
66 | return self._get_obs()
67 |
68 | def _get_obs(self):
69 | return np.concatenate([
70 | self.sim.data.qpos.flat[:7],
71 | self.sim.data.qvel.flat[:7],
72 | self.get_body_com("tips_arm"),
73 | self.get_body_com("object"),
74 | self.get_body_com("goal"),
75 | ])
76 |
--------------------------------------------------------------------------------
/gym/envs/robotics/README.md:
--------------------------------------------------------------------------------
1 | # Robotics environments
2 |
3 | Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics).
4 |
5 | If you use these environments, please cite the following paper:
6 |
7 | ```
8 | @misc{1802.09464,
9 | Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
10 | Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
11 | Year = {2018},
12 | Eprint = {arXiv:1802.09464},
13 | }
14 | ```
15 |
16 | ## Fetch environments
17 |
18 |
19 | [FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position.
20 |
21 |
22 |
23 |
24 | [FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
25 |
26 |
27 |
28 |
29 | [FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position.
30 |
31 |
32 |
33 |
34 | [FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
35 |
36 | ## Shadow Dexterous Hand environments
37 |
38 |
39 | [HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
40 |
41 |
42 |
43 |
44 | [HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
45 |
46 |
47 |
48 |
49 | [HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
50 |
51 |
52 |
53 |
54 | [HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.
55 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_determinism.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from gym.envs.tests.spec_list import spec_list
5 |
6 | @pytest.mark.parametrize("spec", spec_list)
7 | def test_env(spec):
8 | # Note that this precludes running this test in multiple
9 | # threads. However, we probably already can't do multithreading
10 | # due to some environments.
11 | env1 = spec.make()
12 | env1.seed(0)
13 | initial_observation1 = env1.reset()
14 | env1.action_space.seed(0)
15 | action_samples1 = [env1.action_space.sample() for i in range(4)]
16 | step_responses1 = [env1.step(action) for action in action_samples1]
17 | env1.close()
18 |
19 | env2 = spec.make()
20 | env2.seed(0)
21 | initial_observation2 = env2.reset()
22 | env2.action_space.seed(0)
23 | action_samples2 = [env2.action_space.sample() for i in range(4)]
24 | step_responses2 = [env2.step(action) for action in action_samples2]
25 | env2.close()
26 |
27 | for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
28 | try:
29 | assert_equals(action_sample1, action_sample2)
30 | except AssertionError:
31 | print('env1.action_space=', env1.action_space)
32 | print('env2.action_space=', env2.action_space)
33 | print('action_samples1=', action_samples1)
34 | print('action_samples2=', action_samples2)
35 | print('[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2))
36 | raise
37 |
38 | # Don't check rollout equality if it's a a nondeterministic
39 | # environment.
40 | if spec.nondeterministic:
41 | return
42 |
43 | assert_equals(initial_observation1, initial_observation2)
44 |
45 | for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
46 | assert_equals(o1, o2, '[{}] '.format(i))
47 | assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
48 | assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)
49 |
50 | # Go returns a Pachi game board in info, which doesn't
51 | # properly check equality. For now, we hack around this by
52 | # just skipping Go.
53 | if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
54 | assert_equals(i1, i2, '[{}] '.format(i))
55 |
56 | def assert_equals(a, b, prefix=None):
57 | assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b)
58 | if isinstance(a, dict):
59 | assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b)
60 |
61 | for k in a.keys():
62 | v_a = a[k]
63 | v_b = b[k]
64 | assert_equals(v_a, v_b)
65 | elif isinstance(a, np.ndarray):
66 | np.testing.assert_array_equal(a, b)
67 | elif isinstance(a, tuple):
68 | for elem_from_a, elem_from_b in zip(a, b):
69 | assert_equals(elem_from_a, elem_from_b)
70 | else:
71 | assert a == b
72 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/half_cheetah_v3.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | DEFAULT_CAMERA_CONFIG = {
7 | 'distance': 4.0,
8 | }
9 |
10 |
11 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
12 | def __init__(self,
13 | xml_file='half_cheetah.xml',
14 | forward_reward_weight=1.0,
15 | ctrl_cost_weight=0.1,
16 | reset_noise_scale=0.1,
17 | exclude_current_positions_from_observation=True):
18 | utils.EzPickle.__init__(**locals())
19 |
20 | self._forward_reward_weight = forward_reward_weight
21 |
22 | self._ctrl_cost_weight = ctrl_cost_weight
23 |
24 | self._reset_noise_scale = reset_noise_scale
25 |
26 | self._exclude_current_positions_from_observation = (
27 | exclude_current_positions_from_observation)
28 |
29 | mujoco_env.MujocoEnv.__init__(self, xml_file, 5)
30 |
31 | def control_cost(self, action):
32 | control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
33 | return control_cost
34 |
35 | def step(self, action):
36 | x_position_before = self.sim.data.qpos[0]
37 | self.do_simulation(action, self.frame_skip)
38 | x_position_after = self.sim.data.qpos[0]
39 | x_velocity = ((x_position_after - x_position_before)
40 | / self.dt)
41 |
42 | ctrl_cost = self.control_cost(action)
43 |
44 | forward_reward = self._forward_reward_weight * x_velocity
45 |
46 | observation = self._get_obs()
47 | reward = forward_reward - ctrl_cost
48 | done = False
49 | info = {
50 | 'x_position': x_position_after,
51 | 'x_velocity': x_velocity,
52 |
53 | 'reward_run': forward_reward,
54 | 'reward_ctrl': -ctrl_cost
55 | }
56 |
57 | return observation, reward, done, info
58 |
59 | def _get_obs(self):
60 | position = self.sim.data.qpos.flat.copy()
61 | velocity = self.sim.data.qvel.flat.copy()
62 |
63 | if self._exclude_current_positions_from_observation:
64 | position = position[1:]
65 |
66 | observation = np.concatenate((position, velocity)).ravel()
67 | return observation
68 |
69 | def reset_model(self):
70 | noise_low = -self._reset_noise_scale
71 | noise_high = self._reset_noise_scale
72 |
73 | qpos = self.init_qpos + self.np_random.uniform(
74 | low=noise_low, high=noise_high, size=self.model.nq)
75 | qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
76 | self.model.nv)
77 |
78 | self.set_state(qpos, qvel)
79 |
80 | observation = self._get_obs()
81 | return observation
82 |
83 | def viewer_setup(self):
84 | for key, value in DEFAULT_CAMERA_CONFIG.items():
85 | if isinstance(value, np.ndarray):
86 | getattr(self.viewer.cam, key)[:] = value
87 | else:
88 | setattr(self.viewer.cam, key, value)
89 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/hopper.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_mujoco_v2_to_v3_conversion.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import numpy as np
3 | from gym import envs
4 | from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
5 |
6 |
7 | def verify_environments_match(old_environment_id,
8 | new_environment_id,
9 | seed=1,
10 | num_actions=1000):
11 | old_environment = envs.make(old_environment_id)
12 | new_environment = envs.make(new_environment_id)
13 |
14 | old_environment.seed(seed)
15 | new_environment.seed(seed)
16 |
17 | old_reset_observation = old_environment.reset()
18 | new_reset_observation = new_environment.reset()
19 |
20 | np.testing.assert_allclose(old_reset_observation, new_reset_observation)
21 |
22 | for i in range(num_actions):
23 | action = old_environment.action_space.sample()
24 | old_observation, old_reward, old_done, old_info = old_environment.step(
25 | action)
26 | new_observation, new_reward, new_done, new_info = new_environment.step(
27 | action)
28 |
29 | eps = 1e-6
30 | np.testing.assert_allclose(old_observation, new_observation, atol=eps)
31 | np.testing.assert_allclose(old_reward, new_reward, atol=eps)
32 | np.testing.assert_allclose(old_done, new_done, atol=eps)
33 |
34 | for key in old_info:
35 | np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)
36 |
37 |
38 | @unittest.skipIf(skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE)
39 | class Mujocov2Tov3ConversionTest(unittest.TestCase):
40 | def test_environments_match(self):
41 | test_cases = (
42 | {
43 | 'old_id': 'Swimmer-v2',
44 | 'new_id': 'Swimmer-v3'
45 | },
46 | {
47 | 'old_id': 'Hopper-v2',
48 | 'new_id': 'Hopper-v3'
49 | },
50 | {
51 | 'old_id': 'Walker2d-v2',
52 | 'new_id': 'Walker2d-v3'
53 | },
54 | {
55 | 'old_id': 'HalfCheetah-v2',
56 | 'new_id': 'HalfCheetah-v3'
57 | },
58 | {
59 | 'old_id': 'Ant-v2',
60 | 'new_id': 'Ant-v3'
61 | },
62 | {
63 | 'old_id': 'Humanoid-v2',
64 | 'new_id': 'Humanoid-v3'
65 | },
66 | )
67 |
68 | for test_case in test_cases:
69 | verify_environments_match(test_case['old_id'], test_case['new_id'])
70 |
71 | # Raises KeyError because the new envs have extra info
72 | with self.assertRaises(KeyError):
73 | verify_environments_match('Swimmer-v3', 'Swimmer-v2')
74 |
75 | # Raises KeyError because the new envs have extra info
76 | with self.assertRaises(KeyError):
77 | verify_environments_match('Humanoid-v3', 'Humanoid-v2')
78 |
79 | # Raises KeyError because the new envs have extra info
80 | with self.assertRaises(KeyError):
81 | verify_environments_match('Swimmer-v3', 'Swimmer-v2')
82 |
83 |
84 | if __name__ == '__main__':
85 | unittest.main()
86 |
--------------------------------------------------------------------------------
/gym/wrappers/test_filter_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | import gym
5 | from gym import spaces
6 | from gym.wrappers.filter_observation import FilterObservation
7 |
8 |
9 | class FakeEnvironment(gym.Env):
10 | def __init__(self, observation_keys=('state')):
11 | self.observation_space = spaces.Dict({
12 | name: spaces.Box(shape=(2, ), low=-1, high=1, dtype=np.float32)
13 | for name in observation_keys
14 | })
15 | self.action_space = spaces.Box(
16 | shape=(1, ), low=-1, high=1, dtype=np.float32)
17 |
18 | def render(self, width=32, height=32, *args, **kwargs):
19 | del args
20 | del kwargs
21 | image_shape = (height, width, 3)
22 | return np.zeros(image_shape, dtype=np.uint8)
23 |
24 | def reset(self):
25 | observation = self.observation_space.sample()
26 | return observation
27 |
28 | def step(self, action):
29 | del action
30 | observation = self.observation_space.sample()
31 | reward, terminal, info = 0.0, False, {}
32 | return observation, reward, terminal, info
33 |
34 |
35 | FILTER_OBSERVATION_TEST_CASES = (
36 | (('key1', 'key2'), ('key1', )),
37 | (('key1', 'key2'), ('key1', 'key2')),
38 | (('key1', ), None),
39 | (('key1', ), ('key1', )),
40 | )
41 |
42 | ERROR_TEST_CASES = (
43 | ('key', ValueError, "All the filter_keys must be included..*"),
44 | (False, TypeError, "'bool' object is not iterable"),
45 | (1, TypeError, "'int' object is not iterable"),
46 | )
47 |
48 |
49 | class TestFilterObservation(object):
50 | @pytest.mark.parametrize("observation_keys,filter_keys",
51 | FILTER_OBSERVATION_TEST_CASES)
52 | def test_filter_observation(self, observation_keys, filter_keys):
53 | env = FakeEnvironment(observation_keys=observation_keys)
54 |
55 | # Make sure we are testing the right environment for the test.
56 | observation_space = env.observation_space
57 | assert isinstance(observation_space, spaces.Dict)
58 |
59 | wrapped_env = FilterObservation(env, filter_keys=filter_keys)
60 |
61 | assert isinstance(wrapped_env.observation_space, spaces.Dict)
62 |
63 | if filter_keys is None:
64 | filter_keys = tuple(observation_keys)
65 |
66 | assert len(wrapped_env.observation_space.spaces) == len(filter_keys)
67 | assert (tuple(wrapped_env.observation_space.spaces.keys())
68 | == tuple(filter_keys))
69 |
70 | # Check that the added space item is consistent with the added observation.
71 | observation = wrapped_env.reset()
72 | assert (len(observation) == len(filter_keys))
73 |
74 | @pytest.mark.parametrize("filter_keys,error_type,error_match",
75 | ERROR_TEST_CASES)
76 | def test_raises_with_incorrect_arguments(self,
77 | filter_keys,
78 | error_type,
79 | error_match):
80 | env = FakeEnvironment(observation_keys=('key1', 'key2'))
81 |
82 | ValueError
83 |
84 | with pytest.raises(error_type, match=error_match):
85 | FilterObservation(env, filter_keys=filter_keys)
86 |
--------------------------------------------------------------------------------