├── tests ├── __init__.py └── gym │ ├── __init__.py │ ├── envs │ ├── __init__.py │ └── robotics │ │ ├── __init__.py │ │ └── hand │ │ ├── __init__.py │ │ ├── test_reach.py │ │ ├── test_manipulate.py │ │ └── test_manipulate_touch_sensors.py │ └── wrappers │ └── __init__.py ├── .dockerignore ├── gym ├── envs │ ├── tests │ │ ├── __init__.py │ │ ├── rollout.json │ │ ├── test_kellycoinflip.py │ │ ├── test_frozenlake_dfs.py │ │ ├── spec_list.py │ │ ├── test_registration.py │ │ ├── test_envs.py │ │ ├── test_determinism.py │ │ └── test_mujoco_v2_to_v3_conversion.py │ ├── algorithmic │ │ ├── tests │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── copy_.py │ │ ├── reverse.py │ │ ├── repeat_copy.py │ │ ├── duplicated_input.py │ │ └── reversed_addition.py │ ├── robotics │ │ ├── assets │ │ │ ├── stls │ │ │ │ ├── .get │ │ │ │ ├── hand │ │ │ │ │ ├── F1.stl │ │ │ │ │ ├── F2.stl │ │ │ │ │ ├── F3.stl │ │ │ │ │ ├── TH1_z.stl │ │ │ │ │ ├── TH2_z.stl │ │ │ │ │ ├── TH3_z.stl │ │ │ │ │ ├── palm.stl │ │ │ │ │ ├── wrist.stl │ │ │ │ │ ├── knuckle.stl │ │ │ │ │ ├── lfmetacarpal.stl │ │ │ │ │ ├── forearm_electric.stl │ │ │ │ │ └── forearm_electric_cvx.stl │ │ │ │ └── fetch │ │ │ │ │ ├── estop_link.stl │ │ │ │ │ ├── gripper_link.stl │ │ │ │ │ ├── laser_link.stl │ │ │ │ │ ├── torso_fixed_link.stl │ │ │ │ │ ├── base_link_collision.stl │ │ │ │ │ ├── bellows_link_collision.stl │ │ │ │ │ ├── l_wheel_link_collision.stl │ │ │ │ │ ├── r_wheel_link_collision.stl │ │ │ │ │ ├── head_pan_link_collision.stl │ │ │ │ │ ├── head_tilt_link_collision.stl │ │ │ │ │ ├── elbow_flex_link_collision.stl │ │ │ │ │ ├── forearm_roll_link_collision.stl │ │ │ │ │ ├── shoulder_pan_link_collision.stl │ │ │ │ │ ├── torso_lift_link_collision.stl │ │ │ │ │ ├── wrist_flex_link_collision.stl │ │ │ │ │ ├── wrist_roll_link_collision.stl │ │ │ │ │ ├── shoulder_lift_link_collision.stl │ │ │ │ │ └── upperarm_roll_link_collision.stl │ │ │ ├── textures │ │ │ │ ├── block.png │ │ │ │ └── block_hidden.png │ │ │ ├── fetch │ │ │ │ ├── reach.xml │ │ │ │ ├── push.xml │ │ │ │ ├── slide.xml │ │ │ │ └── pick_and_place.xml │ │ │ └── hand │ │ │ │ ├── reach.xml │ │ │ │ ├── shared_asset.xml │ │ │ │ ├── manipulate_pen.xml │ │ │ │ ├── manipulate_pen_touch_sensors.xml │ │ │ │ ├── manipulate_egg.xml │ │ │ │ ├── manipulate_block.xml │ │ │ │ ├── manipulate_egg_touch_sensors.xml │ │ │ │ └── manipulate_block_touch_sensors.xml │ │ ├── fetch │ │ │ ├── __init__.py │ │ │ ├── reach.py │ │ │ ├── push.py │ │ │ ├── pick_and_place.py │ │ │ └── slide.py │ │ ├── hand │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── hand_env.py │ │ └── README.md │ ├── atari │ │ └── __init__.py │ ├── classic_control │ │ ├── assets │ │ │ └── clockwise.png │ │ └── __init__.py │ ├── unittest │ │ └── __init__.py │ ├── box2d │ │ ├── __init__.py │ │ └── test_lunar_lander.py │ ├── README.md │ ├── toy_text │ │ ├── __init__.py │ │ ├── roulette.py │ │ ├── discrete.py │ │ ├── nchain.py │ │ └── hotter_colder.py │ └── mujoco │ │ ├── __init__.py │ │ ├── inverted_pendulum.py │ │ ├── swimmer.py │ │ ├── half_cheetah.py │ │ ├── assets │ │ ├── inverted_pendulum.xml │ │ ├── point.xml │ │ ├── inverted_double_pendulum.xml │ │ ├── swimmer.xml │ │ ├── reacher.xml │ │ └── hopper.xml │ │ ├── walker2d.py │ │ ├── hopper.py │ │ ├── inverted_double_pendulum.py │ │ ├── reacher.py │ │ ├── ant.py │ │ ├── humanoidstandup.py │ │ ├── pusher.py │ │ ├── humanoid.py │ │ ├── thrower.py │ │ ├── striker.py │ │ └── half_cheetah_v3.py ├── spaces │ ├── tests │ │ └── __init__.py │ ├── __init__.py │ ├── multi_binary.py │ ├── discrete.py │ ├── space.py │ ├── tuple.py │ ├── multi_discrete.py │ └── utils.py ├── vector │ ├── tests │ │ ├── __init__.py │ │ ├── test_vector_env.py │ │ ├── test_spaces.py │ │ ├── utils.py │ │ └── test_sync_vector_env.py │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ └── spaces.py │ └── __init__.py ├── wrappers │ ├── tests │ │ └── __init__.py │ ├── monitoring │ │ ├── __init__.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── helpers.py │ │ │ └── test_video_recorder.py │ ├── clip_action.py │ ├── flatten_observation.py │ ├── test_clip_action.py │ ├── transform_reward.py │ ├── transform_observation.py │ ├── __init__.py │ ├── test_rescale_action.py │ ├── test_record_episode_statistics.py │ ├── test_resize_observation.py │ ├── test_transform_observation.py │ ├── resize_observation.py │ ├── time_limit.py │ ├── gray_scale_observation.py │ ├── README.md │ ├── test_flatten_observation.py │ ├── test_frame_stack.py │ ├── rescale_action.py │ ├── test_gray_scale_observation.py │ ├── record_episode_statistics.py │ ├── test_transform_reward.py │ ├── test_atari_preprocessing.py │ ├── filter_observation.py │ └── test_filter_observation.py ├── version.py ├── tests │ └── test_core.py ├── utils │ ├── __init__.py │ ├── tests │ │ ├── test_seeding.py │ │ └── test_atexit.py │ ├── json_utils.py │ ├── colorize.py │ ├── ezpickle.py │ ├── atomic_write.py │ └── closer.py ├── __init__.py └── logger.py ├── examples ├── scripts │ ├── list_envs │ └── sim_env └── agents │ ├── _policies.py │ ├── random_agent.py │ └── keyboard_agent.py ├── docs ├── misc.md ├── wrappers.md ├── readme.md ├── creating-environments.md └── agents.md ├── .gitignore ├── bin ├── docker_entrypoint └── render.py ├── .travis.yml ├── CODE_OF_CONDUCT.rst ├── py.Dockerfile ├── LICENSE.md ├── .github └── stale.yml └── setup.py /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .tox 2 | -------------------------------------------------------------------------------- /tests/gym/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/spaces/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/vector/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/gym/envs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/tests/rollout.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /gym/wrappers/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/gym/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/.get: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/robotics/hand/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.15.4' 2 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/gym/envs/robotics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/gym/envs/robotics/hand/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/atari/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.atari.atari_env import AtariEnv 2 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/F1.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/F2.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/F3.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH1_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/TH1_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH2_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/TH2_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH3_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/TH3_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/palm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/palm.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/wrist.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/wrist.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/textures/block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/textures/block.png -------------------------------------------------------------------------------- /gym/envs/classic_control/assets/clockwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/classic_control/assets/clockwise.png -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/knuckle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/knuckle.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/estop_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/estop_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/gripper_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/gripper_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/laser_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/laser_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/textures/block_hidden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/textures/block_hidden.png -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/forearm_electric.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/forearm_electric.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/base_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl -------------------------------------------------------------------------------- /examples/scripts/list_envs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from gym import envs 3 | envids = [spec.id for spec in envs.registry.all()] 4 | for envid in sorted(envids): 5 | print(envid) 6 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chozabu/gym/master/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/helpers.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import shutil 3 | import tempfile 4 | 5 | @contextlib.contextmanager 6 | def tempdir(): 7 | temp = tempfile.mkdtemp() 8 | yield temp 9 | shutil.rmtree(temp) 10 | -------------------------------------------------------------------------------- /gym/envs/unittest/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.unittest.cube_crash import CubeCrash 2 | from gym.envs.unittest.cube_crash import CubeCrashSparse 3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack 4 | from gym.envs.unittest.memorize_digits import MemorizeDigits 5 | 6 | -------------------------------------------------------------------------------- /docs/misc.md: -------------------------------------------------------------------------------- 1 | # Miscellaneous 2 | 3 | Here we have a bunch of tools, libs, apis, tutorials, resources, etc. provided by the community to add value to the gym ecosystem. 4 | 5 | ## OpenAIGym.jl 6 | 7 | Convenience wrapper of the OpenAI Gym for the Julia language [/tbreloff/OpenAIGym.jl](https://github.com/tbreloff/OpenAIGym.jl) -------------------------------------------------------------------------------- /gym/envs/algorithmic/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.algorithmic.copy_ import CopyEnv 2 | from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv 3 | from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv 4 | from gym.envs.algorithmic.reverse import ReverseEnv 5 | from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv 6 | -------------------------------------------------------------------------------- /gym/envs/box2d/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import Box2D 3 | from gym.envs.box2d.lunar_lander import LunarLander 4 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous 5 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore 6 | from gym.envs.box2d.car_racing import CarRacing 7 | except ImportError: 8 | Box2D = None 9 | -------------------------------------------------------------------------------- /gym/envs/classic_control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.classic_control.cartpole import CartPoleEnv 2 | from gym.envs.classic_control.mountain_car import MountainCarEnv 3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv 4 | from gym.envs.classic_control.pendulum import PendulumEnv 5 | from gym.envs.classic_control.acrobot import AcrobotEnv 6 | 7 | -------------------------------------------------------------------------------- /gym/tests/test_core.py: -------------------------------------------------------------------------------- 1 | from gym import core 2 | 3 | class ArgumentEnv(core.Env): 4 | calls = 0 5 | 6 | def __init__(self, arg): 7 | self.calls += 1 8 | self.arg = arg 9 | 10 | def test_env_instantiation(): 11 | # This looks like a pretty trivial, but given our usage of 12 | # __new__, it's worth having. 13 | env = ArgumentEnv('arg') 14 | assert env.arg == 'arg' 15 | assert env.calls == 1 16 | -------------------------------------------------------------------------------- /gym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | # These submodules should not have any import-time dependencies. 6 | # We want this since we use `utils` during our import-time sanity checks 7 | # that verify that our dependencies are actually present. 8 | from .colorize import colorize 9 | from .ezpickle import EzPickle 10 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/copy_.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class CopyEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | def __init__(self, base=5, chars=True): 10 | super(CopyEnv, self).__init__(base=base, chars=chars) 11 | 12 | def target_from_input_data(self, input_data): 13 | return input_data 14 | -------------------------------------------------------------------------------- /gym/__init__.py: -------------------------------------------------------------------------------- 1 | import distutils.version 2 | import os 3 | import sys 4 | import warnings 5 | 6 | from gym import error 7 | from gym.version import VERSION as __version__ 8 | 9 | from gym.core import Env, GoalEnv, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper 10 | from gym.spaces import Space 11 | from gym.envs import make, spec, register 12 | from gym import logger 13 | from gym import vector 14 | 15 | __all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"] 16 | -------------------------------------------------------------------------------- /gym/wrappers/clip_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import ActionWrapper 4 | from gym.spaces import Box 5 | 6 | 7 | class ClipAction(ActionWrapper): 8 | r"""Clip the continuous action within the valid bound. """ 9 | def __init__(self, env): 10 | assert isinstance(env.action_space, Box) 11 | super(ClipAction, self).__init__(env) 12 | 13 | def action(self, action): 14 | return np.clip(action, self.action_space.low, self.action_space.high) 15 | -------------------------------------------------------------------------------- /gym/utils/tests/test_seeding.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.utils import seeding 3 | 4 | def test_invalid_seeds(): 5 | for seed in [-1, 'test']: 6 | try: 7 | seeding.np_random(seed) 8 | except error.Error: 9 | pass 10 | else: 11 | assert False, 'Invalid seed {} passed validation'.format(seed) 12 | 13 | def test_valid_seeds(): 14 | for seed in [0, 1]: 15 | random, seed1 = seeding.np_random(seed) 16 | assert seed == seed1 17 | -------------------------------------------------------------------------------- /gym/envs/README.md: -------------------------------------------------------------------------------- 1 | # Envs 2 | 3 | These are the core integrated environments. Note that we may later 4 | restructure any of the files, but will keep the environments available 5 | at the relevant package's top-level. So for example, you should access 6 | `AntEnv` as follows: 7 | 8 | ``` 9 | # Will be supported in future releases 10 | from gym.envs import mujoco 11 | mujoco.AntEnv 12 | ``` 13 | 14 | Rather than: 15 | 16 | ``` 17 | # May break in future releases 18 | from gym.envs.mujoco import ant 19 | ant.AntEnv 20 | ``` -------------------------------------------------------------------------------- /gym/envs/algorithmic/reverse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to reverse content over the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 10 | 11 | def __init__(self, base=2): 12 | super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1) 13 | self.last = 50 14 | 15 | def target_from_input_data(self, input_str): 16 | return list(reversed(input_str)) 17 | -------------------------------------------------------------------------------- /gym/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.spaces.space import Space 2 | from gym.spaces.box import Box 3 | from gym.spaces.discrete import Discrete 4 | from gym.spaces.multi_discrete import MultiDiscrete 5 | from gym.spaces.multi_binary import MultiBinary 6 | from gym.spaces.tuple import Tuple 7 | from gym.spaces.dict import Dict 8 | 9 | from gym.spaces.utils import flatdim 10 | from gym.spaces.utils import flatten 11 | from gym.spaces.utils import unflatten 12 | 13 | __all__ = ["Space", "Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict", "flatdim", "flatten", "unflatten"] 14 | -------------------------------------------------------------------------------- /tests/gym/envs/robotics/hand/test_reach.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from gym import envs 6 | from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE 7 | 8 | 9 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE) 10 | def test_serialize_deserialize(): 11 | env1 = envs.make('HandReach-v0', distance_threshold=1e-6) 12 | env1.reset() 13 | env2 = pickle.loads(pickle.dumps(env1)) 14 | 15 | assert env1.distance_threshold == env2.distance_threshold, ( 16 | env1.distance_threshold, env2.distance_threshold) 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | *.py~ 4 | .DS_Store 5 | .cache 6 | .pytest_cache/ 7 | 8 | # Setuptools distribution and build folders. 9 | /dist/ 10 | /build 11 | 12 | # Virtualenv 13 | /env 14 | 15 | # Python egg metadata, regenerated from source files by setuptools. 16 | /*.egg-info 17 | 18 | *.sublime-project 19 | *.sublime-workspace 20 | 21 | logs/ 22 | 23 | .ipynb_checkpoints 24 | ghostdriver.log 25 | 26 | junk 27 | MUJOCO_LOG.txt 28 | 29 | rllab_mujoco 30 | 31 | tutorial/*.html 32 | 33 | # IDE files 34 | .eggs 35 | .tox 36 | 37 | # PyCharm project files 38 | .idea 39 | vizdoom.ini 40 | -------------------------------------------------------------------------------- /gym/envs/tests/test_kellycoinflip.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv 2 | 3 | 4 | class TestKellyCoinflipEnv: 5 | @staticmethod 6 | def test_done_when_reaches_max_wealth(): 7 | # https://github.com/openai/gym/issues/1266 8 | env = KellyCoinflipEnv() 9 | env.seed(1) 10 | env.reset() 11 | done = False 12 | 13 | while not done: 14 | action = int(env.wealth * 20) # bet 20% of the wealth 15 | observation, reward, done, info = env.step(action) 16 | 17 | assert env.wealth == env.max_wealth 18 | -------------------------------------------------------------------------------- /gym/utils/tests/test_atexit.py: -------------------------------------------------------------------------------- 1 | from gym.utils.closer import Closer 2 | 3 | class Closeable(object): 4 | close_called = False 5 | def close(self): 6 | self.close_called = True 7 | 8 | def test_register_unregister(): 9 | registry = Closer(atexit_register=False) 10 | c1 = Closeable() 11 | c2 = Closeable() 12 | 13 | assert not c1.close_called 14 | assert not c2.close_called 15 | registry.register(c1) 16 | id2 = registry.register(c2) 17 | 18 | registry.unregister(id2) 19 | registry.close() 20 | assert c1.close_called 21 | assert not c2.close_called 22 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/repeat_copy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content multiple times from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 10 | 11 | def __init__(self, base=5): 12 | super(RepeatCopyEnv, self).__init__(base=base, chars=True) 13 | self.last = 50 14 | 15 | def target_from_input_data(self, input_data): 16 | return input_data + list(reversed(input_data)) + input_data 17 | -------------------------------------------------------------------------------- /bin/docker_entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is the entrypoint for our Docker image. 3 | 4 | set -ex 5 | 6 | # Set up display; otherwise rendering will fail 7 | Xvfb -screen 0 1024x768x24 & 8 | export DISPLAY=:0 9 | 10 | # Wait for the file to come up 11 | display=0 12 | file="/tmp/.X11-unix/X$display" 13 | for i in $(seq 1 10); do 14 | if [ -e "$file" ]; then 15 | break 16 | fi 17 | 18 | echo "Waiting for $file to be created (try $i/10)" 19 | sleep "$i" 20 | done 21 | if ! [ -e "$file" ]; then 22 | echo "Timing out: $file was not created" 23 | exit 1 24 | fi 25 | 26 | exec "$@" 27 | -------------------------------------------------------------------------------- /bin/render.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import gym 4 | 5 | 6 | parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.') 7 | parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v1') 8 | parser.add_argument('--step', type=int, default=1) 9 | args = parser.parse_args() 10 | 11 | env = gym.make(args.env_id) 12 | env.reset() 13 | 14 | step = 0 15 | while True: 16 | if args.step: 17 | env.step(env.action_space.sample()) 18 | env.render() 19 | if step % 10 == 0: 20 | env.reset() 21 | step += 1 22 | -------------------------------------------------------------------------------- /docs/wrappers.md: -------------------------------------------------------------------------------- 1 | # Wrappers 2 | 3 | ## Space Wrappers 4 | Wrappers that transform observation and/or action space. Contains 5 | * Discretize (make a discrete version of a continuous space) 6 | * Flatten (put all actions/observations into a single dimension) 7 | * Rescale (rescale the range of values for continuous spaces). 8 | 9 | Learn more here: https://github.com/ngc92/space-wrappers 10 | 11 | ## Utility wrappers for Atari Games 12 | The baseline repository contains wrappers that are used when doing Atari 13 | experiments. 14 | These can be found here: https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py 15 | -------------------------------------------------------------------------------- /gym/vector/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.vector.utils.misc import CloudpickleWrapper, clear_mpi_env_vars 2 | from gym.vector.utils.numpy_utils import concatenate, create_empty_array 3 | from gym.vector.utils.shared_memory import create_shared_memory, read_from_shared_memory, write_to_shared_memory 4 | from gym.vector.utils.spaces import _BaseGymSpaces, batch_space 5 | 6 | __all__ = [ 7 | 'CloudpickleWrapper', 8 | 'clear_mpi_env_vars', 9 | 'concatenate', 10 | 'create_empty_array', 11 | 'create_shared_memory', 12 | 'read_from_shared_memory', 13 | 'write_to_shared_memory', 14 | '_BaseGymSpaces', 15 | 'batch_space' 16 | ] 17 | -------------------------------------------------------------------------------- /gym/wrappers/flatten_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym.spaces as spaces 3 | from gym import ObservationWrapper 4 | 5 | 6 | class FlattenObservation(ObservationWrapper): 7 | r"""Observation wrapper that flattens the observation.""" 8 | def __init__(self, env): 9 | super(FlattenObservation, self).__init__(env) 10 | 11 | flatdim = spaces.flatdim(env.observation_space) 12 | self.observation_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(flatdim,), dtype=np.float32) 13 | 14 | def observation(self, observation): 15 | return spaces.flatten(self.env.observation_space, observation) 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | python: 4 | - "3.7" 5 | services: 6 | - docker 7 | env: 8 | # - UBUNTU_VER=14.04 - problems with atari-py 9 | - PY_VER=2.7 10 | - PY_VER=3.5.6 11 | - PY_VER=3.6.8 12 | - PY_VER=3.7.3 13 | 14 | install: "" # so travis doesn't do pip install requirements.txt 15 | script: 16 | - docker build -f py.Dockerfile --build-arg MUJOCO_KEY=$MUJOCO_KEY --build-arg PYTHON_VER=$PY_VER -t gym-test . 17 | - docker run gym-test 18 | 19 | deploy: 20 | provider: pypi 21 | username: $TWINE_USERNAME 22 | password: $TWINE_PASSWORD 23 | on: 24 | tags: true 25 | condition: $PY_VER = 3.5.6 26 | -------------------------------------------------------------------------------- /examples/agents/_policies.py: -------------------------------------------------------------------------------- 1 | # Support code for cem.py 2 | 3 | class BinaryActionLinearPolicy(object): 4 | def __init__(self, theta): 5 | self.w = theta[:-1] 6 | self.b = theta[-1] 7 | def act(self, ob): 8 | y = ob.dot(self.w) + self.b 9 | a = int(y < 0) 10 | return a 11 | 12 | class ContinuousActionLinearPolicy(object): 13 | def __init__(self, theta, n_in, n_out): 14 | assert len(theta) == (n_in + 1) * n_out 15 | self.W = theta[0 : n_in * n_out].reshape(n_in, n_out) 16 | self.b = theta[n_in * n_out : None].reshape(1, n_out) 17 | def act(self, ob): 18 | a = ob.dot(self.W) + self.b 19 | return a 20 | -------------------------------------------------------------------------------- /gym/utils/json_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def json_encode_np(obj): 4 | """ 5 | JSON can't serialize numpy types, so convert to pure python 6 | """ 7 | if isinstance(obj, np.ndarray): 8 | return list(obj) 9 | elif isinstance(obj, np.float32): 10 | return float(obj) 11 | elif isinstance(obj, np.float64): 12 | return float(obj) 13 | elif isinstance(obj, np.int8): 14 | return int(obj) 15 | elif isinstance(obj, np.int16): 16 | return int(obj) 17 | elif isinstance(obj, np.int32): 18 | return int(obj) 19 | elif isinstance(obj, np.int64): 20 | return int(obj) 21 | else: 22 | return obj 23 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | OpenAI Gym is dedicated to providing a harassment-free experience for 2 | everyone, regardless of gender, gender identity and expression, sexual 3 | orientation, disability, physical appearance, body size, age, race, or 4 | religion. We do not tolerate harassment of participants in any form. 5 | 6 | This code of conduct applies to all OpenAI Gym spaces (including Gist 7 | comments) both online and off. Anyone who violates this code of 8 | conduct may be sanctioned or expelled from these spaces at the 9 | discretion of the OpenAI team. 10 | 11 | We may add additional rules over time, which will be made clearly 12 | available to participants. Participants are responsible for knowing 13 | and abiding by these rules. 14 | -------------------------------------------------------------------------------- /gym/envs/toy_text/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.blackjack import BlackjackEnv 2 | from gym.envs.toy_text.roulette import RouletteEnv 3 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv 4 | from gym.envs.toy_text.nchain import NChainEnv 5 | from gym.envs.toy_text.hotter_colder import HotterColder 6 | from gym.envs.toy_text.guessing_game import GuessingGame 7 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv 8 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv 9 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv 10 | from gym.envs.toy_text.taxi import TaxiEnv 11 | from gym.envs.toy_text.guessing_game import GuessingGame 12 | from gym.envs.toy_text.hotter_colder import HotterColder 13 | -------------------------------------------------------------------------------- /gym/envs/box2d/test_lunar_lander.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | try: 3 | import Box2D 4 | from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander 5 | except ImportError: 6 | Box2D = None 7 | 8 | 9 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 10 | def test_lunar_lander(): 11 | _test_lander(LunarLander(), seed=0) 12 | 13 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 14 | def test_lunar_lander_continuous(): 15 | _test_lander(LunarLanderContinuous(), seed=0) 16 | 17 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 18 | def _test_lander(env, seed=None, render=False): 19 | total_reward = demo_heuristic_lander(env, seed=seed, render=render) 20 | assert total_reward > 100 21 | 22 | 23 | -------------------------------------------------------------------------------- /docs/readme.md: -------------------------------------------------------------------------------- 1 | # Table of Contents 2 | 3 | - [Environments](environments.md) lists Gym environments to run your algorithms against. 4 | 5 | - [Creating your own Environments](creating-environments.md) how to create your own Gym environments. 6 | 7 | - [Wrappers](wrappers.md) list of general purpose wrappers for environments. These can perform pre/postprocessing on the data that is exchanged between the agent and the environment. 8 | 9 | - [Agents](agents.md) contains a listing of agents compatible with Gym environments. Agents facilitate the running of an algorithm against an environment. 10 | 11 | - [Miscellaneous](misc.md) is a collection of other value-add tools and utilities. These could be anything from a small convenience lib to a collection of video tutorials or a new language binding. 12 | -------------------------------------------------------------------------------- /tests/gym/envs/robotics/hand/test_manipulate.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import unittest 3 | 4 | import pytest 5 | 6 | from gym import envs 7 | from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE 8 | 9 | 10 | ENVIRONMENT_IDS = ( 11 | 'HandManipulateEgg-v0', 12 | 'HandManipulatePen-v0', 13 | 'HandManipulateBlock-v0', 14 | ) 15 | 16 | 17 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE) 18 | @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS) 19 | def test_serialize_deserialize(environment_id): 20 | env1 = envs.make(environment_id, target_position='fixed') 21 | env1.reset() 22 | env2 = pickle.loads(pickle.dumps(env1)) 23 | 24 | assert env1.target_position == env2.target_position, ( 25 | env1.target_position, env2.target_position) 26 | -------------------------------------------------------------------------------- /gym/wrappers/test_clip_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym.wrappers import ClipAction 5 | 6 | 7 | def test_clip_action(): 8 | # mountaincar: action-based rewards 9 | make_env = lambda: gym.make('MountainCarContinuous-v0') 10 | env = make_env() 11 | wrapped_env = ClipAction(make_env()) 12 | 13 | seed = 0 14 | env.seed(seed) 15 | wrapped_env.seed(seed) 16 | 17 | env.reset() 18 | wrapped_env.reset() 19 | 20 | actions = [[.4], [1.2], [-0.3], [0.0], [-2.5]] 21 | for action in actions: 22 | obs1, r1, d1, _ = env.step(np.clip(action, env.action_space.low, env.action_space.high)) 23 | obs2, r2, d2, _ = wrapped_env.step(action) 24 | assert np.allclose(r1, r2) 25 | assert np.allclose(obs1, obs2) 26 | assert d1 == d2 27 | -------------------------------------------------------------------------------- /gym/envs/robotics/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.robotics.fetch_env import FetchEnv 2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv 3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv 4 | from gym.envs.robotics.fetch.push import FetchPushEnv 5 | from gym.envs.robotics.fetch.reach import FetchReachEnv 6 | 7 | from gym.envs.robotics.hand.reach import HandReachEnv 8 | from gym.envs.robotics.hand.manipulate import HandBlockEnv 9 | from gym.envs.robotics.hand.manipulate import HandEggEnv 10 | from gym.envs.robotics.hand.manipulate import HandPenEnv 11 | 12 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv 13 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv 14 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv 15 | -------------------------------------------------------------------------------- /tests/gym/envs/robotics/hand/test_manipulate_touch_sensors.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from gym import envs 6 | from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE 7 | 8 | 9 | ENVIRONMENT_IDS = ( 10 | 'HandManipulateEggTouchSensors-v1', 11 | 'HandManipulatePenTouchSensors-v0', 12 | 'HandManipulateBlockTouchSensors-v0', 13 | ) 14 | 15 | 16 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE) 17 | @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS) 18 | def test_serialize_deserialize(environment_id): 19 | env1 = envs.make(environment_id, target_position='fixed') 20 | env1.reset() 21 | env2 = pickle.loads(pickle.dumps(env1)) 22 | 23 | assert env1.target_position == env2.target_position, ( 24 | env1.target_position, env2.target_position) 25 | -------------------------------------------------------------------------------- /gym/wrappers/transform_reward.py: -------------------------------------------------------------------------------- 1 | from gym import RewardWrapper 2 | 3 | 4 | class TransformReward(RewardWrapper): 5 | r"""Transform the reward via an arbitrary function. 6 | 7 | Example:: 8 | 9 | >>> import gym 10 | >>> env = gym.make('CartPole-v1') 11 | >>> env = TransformReward(env, lambda r: 0.01*r) 12 | >>> env.reset() 13 | >>> observation, reward, done, info = env.step(env.action_space.sample()) 14 | >>> reward 15 | 0.01 16 | 17 | Args: 18 | env (Env): environment 19 | f (callable): a function that transforms the reward 20 | 21 | """ 22 | def __init__(self, env, f): 23 | super(TransformReward, self).__init__(env) 24 | assert callable(f) 25 | self.f = f 26 | 27 | def reward(self, reward): 28 | return self.f(reward) 29 | -------------------------------------------------------------------------------- /gym/logger.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from gym.utils import colorize 4 | 5 | DEBUG = 10 6 | INFO = 20 7 | WARN = 30 8 | ERROR = 40 9 | DISABLED = 50 10 | 11 | MIN_LEVEL = 30 12 | 13 | def set_level(level): 14 | """ 15 | Set logging threshold on current logger. 16 | """ 17 | global MIN_LEVEL 18 | MIN_LEVEL = level 19 | 20 | def debug(msg, *args): 21 | if MIN_LEVEL <= DEBUG: 22 | print('%s: %s'%('DEBUG', msg % args)) 23 | 24 | def info(msg, *args): 25 | if MIN_LEVEL <= INFO: 26 | print('%s: %s'%('INFO', msg % args)) 27 | 28 | def warn(msg, *args): 29 | if MIN_LEVEL <= WARN: 30 | warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow')) 31 | 32 | def error(msg, *args): 33 | if MIN_LEVEL <= ERROR: 34 | print(colorize('%s: %s'%('ERROR', msg % args), 'red')) 35 | 36 | # DEPRECATED: 37 | setLevel = set_level 38 | -------------------------------------------------------------------------------- /gym/wrappers/transform_observation.py: -------------------------------------------------------------------------------- 1 | from gym import ObservationWrapper 2 | 3 | 4 | class TransformObservation(ObservationWrapper): 5 | r"""Transform the observation via an arbitrary function. 6 | 7 | Example:: 8 | 9 | >>> import gym 10 | >>> env = gym.make('CartPole-v1') 11 | >>> env = TransformObservation(env, lambda obs: obs + 0.1*np.random.randn(*obs.shape)) 12 | >>> env.reset() 13 | array([-0.08319338, 0.04635121, -0.07394746, 0.20877492]) 14 | 15 | Args: 16 | env (Env): environment 17 | f (callable): a function that transforms the observation 18 | 19 | """ 20 | def __init__(self, env, f): 21 | super(TransformObservation, self).__init__(env) 22 | assert callable(f) 23 | self.f = f 24 | 25 | def observation(self, observation): 26 | return self.f(observation) 27 | -------------------------------------------------------------------------------- /gym/envs/mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco.mujoco_env import MujocoEnv 2 | # ^^^^^ so that user gets the correct error 3 | # message if mujoco is not installed correctly 4 | from gym.envs.mujoco.ant import AntEnv 5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 6 | from gym.envs.mujoco.hopper import HopperEnv 7 | from gym.envs.mujoco.walker2d import Walker2dEnv 8 | from gym.envs.mujoco.humanoid import HumanoidEnv 9 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv 10 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv 11 | from gym.envs.mujoco.reacher import ReacherEnv 12 | from gym.envs.mujoco.swimmer import SwimmerEnv 13 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv 14 | from gym.envs.mujoco.pusher import PusherEnv 15 | from gym.envs.mujoco.thrower import ThrowerEnv 16 | from gym.envs.mujoco.striker import StrikerEnv 17 | -------------------------------------------------------------------------------- /gym/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.wrappers.monitor import Monitor 3 | from gym.wrappers.time_limit import TimeLimit 4 | from gym.wrappers.filter_observation import FilterObservation 5 | from gym.wrappers.atari_preprocessing import AtariPreprocessing 6 | from gym.wrappers.rescale_action import RescaleAction 7 | from gym.wrappers.flatten_observation import FlattenObservation 8 | from gym.wrappers.gray_scale_observation import GrayScaleObservation 9 | from gym.wrappers.frame_stack import LazyFrames 10 | from gym.wrappers.frame_stack import FrameStack 11 | from gym.wrappers.transform_observation import TransformObservation 12 | from gym.wrappers.transform_reward import TransformReward 13 | from gym.wrappers.resize_observation import ResizeObservation 14 | from gym.wrappers.clip_action import ClipAction 15 | from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics 16 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/reach.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'reach.xml') 8 | 9 | 10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.4049, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | } 17 | fetch_env.FetchEnv.__init__( 18 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20, 19 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 20 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 21 | initial_qpos=initial_qpos, reward_type=reward_type) 22 | utils.EzPickle.__init__(self) 23 | -------------------------------------------------------------------------------- /gym/wrappers/test_rescale_action.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import RescaleAction 7 | 8 | 9 | def test_rescale_action(): 10 | env = gym.make('CartPole-v1') 11 | with pytest.raises(AssertionError): 12 | env = RescaleAction(env, -1, 1) 13 | del env 14 | 15 | env = gym.make('Pendulum-v0') 16 | wrapped_env = RescaleAction(gym.make('Pendulum-v0'), -1, 1) 17 | 18 | seed = 0 19 | env.seed(seed) 20 | wrapped_env.seed(seed) 21 | 22 | obs = env.reset() 23 | wrapped_obs = wrapped_env.reset() 24 | assert np.allclose(obs, wrapped_obs) 25 | 26 | obs, reward, _, _ = env.step([1.5]) 27 | with pytest.raises(AssertionError): 28 | wrapped_env.step([1.5]) 29 | wrapped_obs, wrapped_reward, _, _ = wrapped_env.step([0.75]) 30 | 31 | assert np.allclose(obs, wrapped_obs) 32 | assert np.allclose(reward, wrapped_reward) 33 | -------------------------------------------------------------------------------- /py.Dockerfile: -------------------------------------------------------------------------------- 1 | # A Dockerfile that sets up a full Gym install with test dependencies 2 | ARG PYTHON_VER 3 | FROM python:$PYTHON_VER 4 | RUN apt-get -y update && apt-get install -y unzip libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg 5 | RUN \ 6 | # Download mujoco 7 | mkdir /root/.mujoco && \ 8 | cd /root/.mujoco && \ 9 | curl -O https://www.roboti.us/download/mjpro150_linux.zip && \ 10 | unzip mjpro150_linux.zip 11 | 12 | ARG MUJOCO_KEY 13 | ARG PYTHON_VER 14 | ENV MUJOCO_KEY=$MUJOCO_KEY 15 | 16 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin 17 | RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt 18 | RUN pip install pytest pytest-forked lz4 19 | 20 | COPY . /usr/local/gym/ 21 | WORKDIR /usr/local/gym/ 22 | RUN [ "$PYTHON_VER" != "2.7" ] && pip install .[all] || pip install . 23 | 24 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] 25 | CMD ["pytest","--forked"] 26 | -------------------------------------------------------------------------------- /gym/spaces/multi_binary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class MultiBinary(Space): 6 | def __init__(self, n): 7 | self.n = n 8 | super(MultiBinary, self).__init__((self.n,), np.int8) 9 | 10 | def sample(self): 11 | return self.np_random.randint(low=0, high=2, size=self.n, dtype=self.dtype) 12 | 13 | def contains(self, x): 14 | if isinstance(x, list): 15 | x = np.array(x) # Promote list to array for contains check 16 | return ((x==0) | (x==1)).all() 17 | 18 | def to_jsonable(self, sample_n): 19 | return np.array(sample_n).tolist() 20 | 21 | def from_jsonable(self, sample_n): 22 | return [np.asarray(sample) for sample in sample_n] 23 | 24 | def __repr__(self): 25 | return "MultiBinary({})".format(self.n) 26 | 27 | def __eq__(self, other): 28 | return isinstance(other, MultiBinary) and self.n == other.n 29 | -------------------------------------------------------------------------------- /gym/wrappers/test_record_episode_statistics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import gym 4 | from gym.wrappers import RecordEpisodeStatistics 5 | 6 | 7 | @pytest.mark.parametrize('env_id', ['CartPole-v0', 'Pendulum-v0']) 8 | @pytest.mark.parametrize('deque_size', [2, 5]) 9 | def test_record_episode_statistics(env_id, deque_size): 10 | env = gym.make(env_id) 11 | env = RecordEpisodeStatistics(env, deque_size) 12 | 13 | for n in range(5): 14 | env.reset() 15 | assert env.episode_return == 0.0 16 | assert env.episode_length == 0 17 | for t in range(env.spec.max_episode_steps): 18 | _, _, done, info = env.step(env.action_space.sample()) 19 | if done: 20 | assert 'episode' in info 21 | assert all([item in info['episode'] for item in ['r', 'l', 't']]) 22 | break 23 | assert len(env.return_queue) == deque_size 24 | assert len(env.length_queue) == deque_size 25 | -------------------------------------------------------------------------------- /gym/wrappers/test_resize_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import gym 4 | from gym.wrappers import ResizeObservation 5 | try: 6 | import atari_py 7 | except ImportError: 8 | atari_py = None 9 | 10 | 11 | @pytest.mark.skipif(atari_py is None, reason='Only run this test when atari_py is installed') 12 | @pytest.mark.parametrize('env_id', ['PongNoFrameskip-v0', 'SpaceInvadersNoFrameskip-v0']) 13 | @pytest.mark.parametrize('shape', [16, 32, (8, 5), [10, 7]]) 14 | def test_resize_observation(env_id, shape): 15 | env = gym.make(env_id) 16 | env = ResizeObservation(env, shape) 17 | 18 | 19 | assert env.observation_space.shape[-1] == 3 20 | obs = env.reset() 21 | if isinstance(shape, int): 22 | assert env.observation_space.shape[:2] == (shape, shape) 23 | assert obs.shape == (shape, shape, 3) 24 | else: 25 | assert env.observation_space.shape[:2] == tuple(shape) 26 | assert obs.shape == tuple(shape) + (3,) 27 | -------------------------------------------------------------------------------- /gym/wrappers/test_transform_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import TransformObservation 7 | 8 | 9 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0']) 10 | def test_transform_observation(env_id): 11 | affine_transform = lambda x: 3*x + 2 12 | env = gym.make(env_id) 13 | wrapped_env = TransformObservation(gym.make(env_id), lambda obs: affine_transform(obs)) 14 | 15 | env.seed(0) 16 | wrapped_env.seed(0) 17 | 18 | obs = env.reset() 19 | wrapped_obs = wrapped_env.reset() 20 | assert np.allclose(wrapped_obs, affine_transform(obs)) 21 | 22 | action = env.action_space.sample() 23 | obs, reward, done, _ = env.step(action) 24 | wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action) 25 | assert np.allclose(wrapped_obs, affine_transform(obs)) 26 | assert np.allclose(wrapped_reward, reward) 27 | assert wrapped_done == done 28 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/push.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml') 8 | 9 | 10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/wrappers/resize_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym.spaces import Box 4 | from gym import ObservationWrapper 5 | 6 | 7 | class ResizeObservation(ObservationWrapper): 8 | r"""Downsample the image observation to a square image. """ 9 | def __init__(self, env, shape): 10 | super(ResizeObservation, self).__init__(env) 11 | if isinstance(shape, int): 12 | shape = (shape, shape) 13 | assert all(x > 0 for x in shape), shape 14 | self.shape = tuple(shape) 15 | 16 | obs_shape = self.shape + self.observation_space.shape[2:] 17 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8) 18 | 19 | def observation(self, observation): 20 | import cv2 21 | observation = cv2.resize(observation, self.shape[::-1], interpolation=cv2.INTER_AREA) 22 | if observation.ndim == 2: 23 | observation = np.expand_dims(observation, -1) 24 | return observation 25 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/duplicated_input.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to return every nth character from the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | from __future__ import division 6 | from gym.envs.algorithmic import algorithmic_env 7 | 8 | 9 | class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv): 10 | def __init__(self, duplication=2, base=5): 11 | self.duplication = duplication 12 | super(DuplicatedInputEnv, self).__init__(base=base, chars=True) 13 | 14 | def generate_input_data(self, size): 15 | res = [] 16 | if size < self.duplication: 17 | size = self.duplication 18 | for i in range(size//self.duplication): 19 | char = self.np_random.randint(self.base) 20 | for _ in range(self.duplication): 21 | res.append(char) 22 | return res 23 | 24 | def target_from_input_data(self, input_data): 25 | return [input_data[i] for i in range(0, len(input_data), self.duplication)] 26 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/pick_and_place.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml') 8 | 9 | 10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, 20 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/spaces/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Discrete(Space): 6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 7 | 8 | Example:: 9 | 10 | >>> Discrete(2) 11 | 12 | """ 13 | def __init__(self, n): 14 | assert n >= 0 15 | self.n = n 16 | super(Discrete, self).__init__((), np.int64) 17 | 18 | def sample(self): 19 | return self.np_random.randint(self.n) 20 | 21 | def contains(self, x): 22 | if isinstance(x, int): 23 | as_int = x 24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.kind in np.typecodes['AllInteger'] and x.shape == ()): 25 | as_int = int(x) 26 | else: 27 | return False 28 | return as_int >= 0 and as_int < self.n 29 | 30 | def __repr__(self): 31 | return "Discrete(%d)" % self.n 32 | 33 | def __eq__(self, other): 34 | return isinstance(other, Discrete) and self.n == other.n 35 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/slide.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from gym import utils 5 | from gym.envs.robotics import fetch_env 6 | 7 | 8 | # Ensure we get the path separator correct on windows 9 | MODEL_XML_PATH = os.path.join('fetch', 'slide.xml') 10 | 11 | 12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle): 13 | def __init__(self, reward_type='sparse'): 14 | initial_qpos = { 15 | 'robot0:slide0': 0.05, 16 | 'robot0:slide1': 0.48, 17 | 'robot0:slide2': 0.0, 18 | 'object0:joint': [1.7, 1.1, 0.41, 1., 0., 0., 0.], 19 | } 20 | fetch_env.FetchEnv.__init__( 21 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 22 | gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]), 23 | obj_range=0.1, target_range=0.3, distance_threshold=0.05, 24 | initial_qpos=initial_qpos, reward_type=reward_type) 25 | utils.EzPickle.__init__(self) 26 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /gym/wrappers/time_limit.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | 4 | class TimeLimit(gym.Wrapper): 5 | def __init__(self, env, max_episode_steps=None): 6 | super(TimeLimit, self).__init__(env) 7 | if max_episode_steps is None and self.env.spec is not None: 8 | max_episode_steps = env.spec.max_episode_steps 9 | if self.env.spec is not None: 10 | self.env.spec.max_episode_steps = max_episode_steps 11 | self._max_episode_steps = max_episode_steps 12 | self._elapsed_steps = None 13 | 14 | def step(self, action): 15 | assert self._elapsed_steps is not None, "Cannot call env.step() before calling reset()" 16 | observation, reward, done, info = self.env.step(action) 17 | self._elapsed_steps += 1 18 | if self._elapsed_steps >= self._max_episode_steps: 19 | info['TimeLimit.truncated'] = not done 20 | done = True 21 | return observation, reward, done, info 22 | 23 | def reset(self, **kwargs): 24 | self._elapsed_steps = 0 25 | return self.env.reset(**kwargs) 26 | -------------------------------------------------------------------------------- /gym/wrappers/gray_scale_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym.spaces import Box 4 | from gym import ObservationWrapper 5 | 6 | 7 | class GrayScaleObservation(ObservationWrapper): 8 | r"""Convert the image observation from RGB to gray scale. """ 9 | def __init__(self, env, keep_dim=False): 10 | super(GrayScaleObservation, self).__init__(env) 11 | self.keep_dim = keep_dim 12 | 13 | assert len(env.observation_space.shape) == 3 and env.observation_space.shape[-1] == 3 14 | obs_shape = self.observation_space.shape[:2] 15 | if self.keep_dim: 16 | self.observation_space = Box(low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8) 17 | else: 18 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8) 19 | 20 | def observation(self, observation): 21 | import cv2 22 | observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY) 23 | if self.keep_dim: 24 | observation = np.expand_dims(observation, -1) 25 | return observation 26 | -------------------------------------------------------------------------------- /gym/utils/colorize.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | color2num = dict( 6 | gray=30, 7 | red=31, 8 | green=32, 9 | yellow=33, 10 | blue=34, 11 | magenta=35, 12 | cyan=36, 13 | white=37, 14 | crimson=38 15 | ) 16 | 17 | 18 | def colorize(string, color, bold=False, highlight = False): 19 | """Return string surrounded by appropriate terminal color codes to 20 | print colorized text. Valid colors: gray, red, green, yellow, 21 | blue, magenta, cyan, white, crimson 22 | """ 23 | 24 | # Import six here so that `utils` has no import-time dependencies. 25 | # We want this since we use `utils` during our import-time sanity checks 26 | # that verify that our dependencies (including six) are actually present. 27 | import six 28 | 29 | attr = [] 30 | num = color2num[color] 31 | if highlight: num += 10 32 | attr.append(six.u(str(num))) 33 | if bold: attr.append(six.u('1')) 34 | attrs = six.u(';').join(attr) 35 | return six.u('\x1b[%sm%s\x1b[0m') % (attrs, string) 36 | -------------------------------------------------------------------------------- /gym/utils/ezpickle.py: -------------------------------------------------------------------------------- 1 | class EzPickle(object): 2 | """Objects that are pickled and unpickled via their constructor 3 | arguments. 4 | 5 | Example usage: 6 | 7 | class Dog(Animal, EzPickle): 8 | def __init__(self, furcolor, tailkind="bushy"): 9 | Animal.__init__() 10 | EzPickle.__init__(furcolor, tailkind) 11 | ... 12 | 13 | When this object is unpickled, a new Dog will be constructed by passing the provided 14 | furcolor and tailkind into the constructor. However, philosophers are still not sure 15 | whether it is still the same dog. 16 | 17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo 18 | and Atari. 19 | """ 20 | def __init__(self, *args, **kwargs): 21 | self._ezpickle_args = args 22 | self._ezpickle_kwargs = kwargs 23 | def __getstate__(self): 24 | return {"_ezpickle_args" : self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs} 25 | def __setstate__(self, d): 26 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"]) 27 | self.__dict__.update(out.__dict__) 28 | -------------------------------------------------------------------------------- /gym/wrappers/README.md: -------------------------------------------------------------------------------- 1 | # Wrappers 2 | 3 | Wrappers are used to transform an environment in a modular way: 4 | 5 | ```python 6 | env = gym.make('Pong-v0') 7 | env = MyWrapper(env) 8 | ``` 9 | 10 | Note that we may later restructure any of the files in this directory, 11 | but will keep the wrappers available at the wrappers' top-level 12 | folder. So for example, you should access `MyWrapper` as follows: 13 | 14 | ```python 15 | from gym.wrappers import MyWrapper 16 | ``` 17 | 18 | ## Quick tips for writing your own wrapper 19 | 20 | - Don't forget to call `super(class_name, self).__init__(env)` if you override the wrapper's `__init__` function 21 | - You can access the inner environment with `self.unwrapped` 22 | - You can access the previous layer using `self.env` 23 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer 24 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `step`, `reset`, `render`, `close`, or `seed` 25 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`) 26 | -------------------------------------------------------------------------------- /gym/wrappers/test_flatten_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import FlattenObservation 7 | from gym import spaces 8 | 9 | 10 | @pytest.mark.parametrize('env_id', ['Blackjack-v0', 'KellyCoinflip-v0']) 11 | def test_flatten_observation(env_id): 12 | env = gym.make(env_id) 13 | wrapped_env = FlattenObservation(env) 14 | 15 | obs = env.reset() 16 | wrapped_obs = wrapped_env.reset() 17 | 18 | if env_id == 'Blackjack-v0': 19 | space = spaces.Tuple(( 20 | spaces.Discrete(32), 21 | spaces.Discrete(11), 22 | spaces.Discrete(2))) 23 | wrapped_space = spaces.Box(-np.inf, np.inf, 24 | [32 + 11 + 2], dtype=np.float32) 25 | elif env_id == 'KellyCoinflip-v0': 26 | space = spaces.Tuple(( 27 | spaces.Box(0, 250.0, [1], dtype=np.float32), 28 | spaces.Discrete(300 + 1))) 29 | wrapped_space = spaces.Box(-np.inf, np.inf, 30 | [1 + (300 + 1)], dtype=np.float32) 31 | 32 | assert space.contains(obs) 33 | assert wrapped_space.contains(wrapped_obs) 34 | -------------------------------------------------------------------------------- /gym/envs/mujoco/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2) 9 | 10 | def step(self, a): 11 | reward = 1.0 12 | self.do_simulation(a, self.frame_skip) 13 | ob = self._get_obs() 14 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2) 15 | done = not notdone 16 | return ob, reward, done, {} 17 | 18 | def reset_model(self): 19 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01) 20 | qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01) 21 | self.set_state(qpos, qvel) 22 | return self._get_obs() 23 | 24 | def _get_obs(self): 25 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel() 26 | 27 | def viewer_setup(self): 28 | v = self.viewer 29 | v.cam.trackbodyid = 0 30 | v.cam.distance = self.model.stat.extent 31 | -------------------------------------------------------------------------------- /gym/envs/mujoco/swimmer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | ctrl_cost_coeff = 0.0001 12 | xposbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | xposafter = self.sim.data.qpos[0] 15 | reward_fwd = (xposafter - xposbefore) / self.dt 16 | reward_ctrl = - ctrl_cost_coeff * np.square(a).sum() 17 | reward = reward_fwd + reward_ctrl 18 | ob = self._get_obs() 19 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | qpos = self.sim.data.qpos 23 | qvel = self.sim.data.qvel 24 | return np.concatenate([qpos.flat[2:], qvel.flat]) 25 | 26 | def reset_model(self): 27 | self.set_state( 28 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 29 | self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv) 30 | ) 31 | return self._get_obs() 32 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/reversed_addition.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from gym.envs.algorithmic import algorithmic_env 3 | 4 | 5 | class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv): 6 | def __init__(self, rows=2, base=3): 7 | super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False) 8 | 9 | def target_from_input_data(self, input_strings): 10 | curry = 0 11 | target = [] 12 | for digits in input_strings: 13 | total = sum(digits) + curry 14 | target.append(total % self.base) 15 | curry = total // self.base 16 | 17 | if curry > 0: 18 | target.append(curry) 19 | return target 20 | 21 | @property 22 | def time_limit(self): 23 | # Quirk preserved for the sake of consistency: add the length of the input 24 | # rather than the length of the desired output (which may differ if there's 25 | # an extra carried digit). 26 | # TODO: It seems like this time limit is so strict as to make Addition3-v0 27 | # unsolvable, since agents aren't even given enough time steps to look at 28 | # all the digits. (The solutions on the scoreboard seem to only work by 29 | # save-scumming.) 30 | return self.input_width*2 + 4 31 | -------------------------------------------------------------------------------- /gym/wrappers/test_frame_stack.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | pytest.importorskip("atari_py") 3 | 4 | import numpy as np 5 | import gym 6 | from gym.wrappers import FrameStack 7 | try: 8 | import lz4 9 | except ImportError: 10 | lz4 = None 11 | 12 | 13 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0', 'Pong-v0']) 14 | @pytest.mark.parametrize('num_stack', [2, 3, 4]) 15 | @pytest.mark.parametrize('lz4_compress', [ 16 | pytest.param(True, marks=pytest.mark.skipif(lz4 is None, reason="Need lz4 to run tests with compression")), 17 | False 18 | ]) 19 | def test_frame_stack(env_id, num_stack, lz4_compress): 20 | env = gym.make(env_id) 21 | shape = env.observation_space.shape 22 | env = FrameStack(env, num_stack, lz4_compress) 23 | assert env.observation_space.shape == (num_stack,) + shape 24 | 25 | obs = env.reset() 26 | obs = np.asarray(obs) 27 | assert obs.shape == (num_stack,) + shape 28 | for i in range(1, num_stack): 29 | assert np.allclose(obs[i - 1], obs[i]) 30 | 31 | obs, _, _, _ = env.step(env.action_space.sample()) 32 | obs = np.asarray(obs) 33 | assert obs.shape == (num_stack,) + shape 34 | for i in range(1, num_stack - 1): 35 | assert np.allclose(obs[i - 1], obs[i]) 36 | assert not np.allclose(obs[-1], obs[-2]) 37 | -------------------------------------------------------------------------------- /gym/vector/utils/misc.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | 4 | __all__ = ['CloudpickleWrapper', 'clear_mpi_env_vars'] 5 | 6 | class CloudpickleWrapper(object): 7 | def __init__(self, fn): 8 | self.fn = fn 9 | 10 | def __getstate__(self): 11 | import cloudpickle 12 | return cloudpickle.dumps(self.fn) 13 | 14 | def __setstate__(self, ob): 15 | import pickle 16 | self.fn = pickle.loads(ob) 17 | 18 | def __call__(self): 19 | return self.fn() 20 | 21 | @contextlib.contextmanager 22 | def clear_mpi_env_vars(): 23 | """ 24 | `from mpi4py import MPI` will call `MPI_Init` by default. If the child 25 | process has MPI environment variables, MPI will think that the child process 26 | is an MPI process just like the parent and do bad things such as hang. 27 | 28 | This context manager is a hacky way to clear those environment variables 29 | temporarily such as when we are starting multiprocessing Processes. 30 | """ 31 | removed_environment = {} 32 | for k, v in list(os.environ.items()): 33 | for prefix in ['OMPI_', 'PMI_']: 34 | if k.startswith(prefix): 35 | removed_environment[k] = v 36 | del os.environ[k] 37 | try: 38 | yield 39 | finally: 40 | os.environ.update(removed_environment) 41 | -------------------------------------------------------------------------------- /gym/envs/mujoco/half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, action): 11 | xposbefore = self.sim.data.qpos[0] 12 | self.do_simulation(action, self.frame_skip) 13 | xposafter = self.sim.data.qpos[0] 14 | ob = self._get_obs() 15 | reward_ctrl = - 0.1 * np.square(action).sum() 16 | reward_run = (xposafter - xposbefore)/self.dt 17 | reward = reward_ctrl + reward_run 18 | done = False 19 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | return np.concatenate([ 23 | self.sim.data.qpos.flat[1:], 24 | self.sim.data.qvel.flat, 25 | ]) 26 | 27 | def reset_model(self): 28 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq) 29 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 30 | self.set_state(qpos, qvel) 31 | return self._get_obs() 32 | 33 | def viewer_setup(self): 34 | self.viewer.cam.distance = self.model.stat.extent * 0.5 35 | -------------------------------------------------------------------------------- /gym/wrappers/rescale_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import spaces 5 | 6 | 7 | class RescaleAction(gym.ActionWrapper): 8 | r"""Rescales the continuous action space of the environment to a range [a,b]. 9 | 10 | Example:: 11 | 12 | >>> RescaleAction(env, a, b).action_space == Box(a,b) 13 | True 14 | 15 | """ 16 | def __init__(self, env, a, b): 17 | assert isinstance(env.action_space, spaces.Box), ( 18 | "expected Box action space, got {}".format(type(env.action_space))) 19 | assert np.less_equal(a, b).all(), (a, b) 20 | super(RescaleAction, self).__init__(env) 21 | self.a = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + a 22 | self.b = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + b 23 | self.action_space = spaces.Box(low=a, high=b, shape=env.action_space.shape, dtype=env.action_space.dtype) 24 | 25 | def action(self, action): 26 | assert np.all(np.greater_equal(action, self.a)), (action, self.a) 27 | assert np.all(np.less_equal(action, self.b)), (action, self.b) 28 | low = self.env.action_space.low 29 | high = self.env.action_space.high 30 | action = low + (high - low)*((action - self.a)/(self.b - self.a)) 31 | action = np.clip(action, low, high) 32 | return action 33 | -------------------------------------------------------------------------------- /gym/envs/tests/test_frozenlake_dfs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym.envs.toy_text.frozen_lake import generate_random_map 5 | 6 | # Test that FrozenLake map generation creates valid maps of various sizes. 7 | def test_frozenlake_dfs_map_generation(): 8 | 9 | def frozenlake_dfs_path_exists(res): 10 | frontier, discovered = [], set() 11 | frontier.append((0,0)) 12 | while frontier: 13 | r, c = frontier.pop() 14 | if not (r,c) in discovered: 15 | discovered.add((r,c)) 16 | directions = [(1, 0), (0, 1), (-1, 0), (0, -1)] 17 | for x, y in directions: 18 | r_new = r + x 19 | c_new = c + y 20 | if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size: 21 | continue 22 | if res[r_new][c_new] == 'G': 23 | return True 24 | if (res[r_new][c_new] not in '#H'): 25 | frontier.append((r_new, c_new)) 26 | return False 27 | 28 | map_sizes = [5, 10, 200] 29 | for size in map_sizes: 30 | new_frozenlake = generate_random_map(size) 31 | assert len(new_frozenlake) == size 32 | assert len(new_frozenlake[0]) == size 33 | assert frozenlake_dfs_path_exists(new_frozenlake) 34 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/push.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /gym/wrappers/test_gray_scale_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import GrayScaleObservation 7 | from gym.wrappers import AtariPreprocessing 8 | pytest.importorskip('atari_py') 9 | pytest.importorskip('cv2') 10 | 11 | @pytest.mark.parametrize('env_id', ['PongNoFrameskip-v0', 'SpaceInvadersNoFrameskip-v0']) 12 | @pytest.mark.parametrize('keep_dim', [True, False]) 13 | def test_gray_scale_observation(env_id, keep_dim): 14 | gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True) 15 | rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False) 16 | wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim) 17 | assert rgb_env.observation_space.shape[-1] == 3 18 | 19 | seed = 0 20 | gray_env.seed(seed) 21 | wrapped_env.seed(seed) 22 | 23 | gray_obs = gray_env.reset() 24 | wrapped_obs = wrapped_env.reset() 25 | 26 | if keep_dim: 27 | assert wrapped_env.observation_space.shape[-1] == 1 28 | assert len(wrapped_obs.shape) == 3 29 | wrapped_obs = wrapped_obs.squeeze(-1) 30 | else: 31 | assert len(wrapped_env.observation_space.shape) == 2 32 | assert len(wrapped_obs.shape) == 2 33 | 34 | # ALE gray scale is slightly different, but no more than by one shade 35 | assert np.allclose(gray_obs.astype('int32'), wrapped_obs.astype('int32'), atol=1) 36 | -------------------------------------------------------------------------------- /gym/wrappers/record_episode_statistics.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import deque 3 | 4 | import gym 5 | 6 | 7 | class RecordEpisodeStatistics(gym.Wrapper): 8 | def __init__(self, env, deque_size=100): 9 | super(RecordEpisodeStatistics, self).__init__(env) 10 | self.t0 = time.time() # TODO: use perf_counter when gym removes Python 2 support 11 | self.episode_return = 0.0 12 | self.episode_length = 0 13 | self.return_queue = deque(maxlen=deque_size) 14 | self.length_queue = deque(maxlen=deque_size) 15 | 16 | def reset(self, **kwargs): 17 | observation = super(RecordEpisodeStatistics, self).reset(**kwargs) 18 | self.episode_return = 0.0 19 | self.episode_length = 0 20 | return observation 21 | 22 | def step(self, action): 23 | observation, reward, done, info = super(RecordEpisodeStatistics, self).step(action) 24 | self.episode_return += reward 25 | self.episode_length += 1 26 | if done: 27 | info['episode'] = {'r': self.episode_return, 28 | 'l': self.episode_length, 29 | 't': round(time.time() - self.t0, 6)} 30 | self.return_queue.append(self.episode_return) 31 | self.length_queue.append(self.episode_length) 32 | self.episode_return = 0.0 33 | self.episode_length = 0 34 | return observation, reward, done, info 35 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/slide.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /gym/envs/tests/spec_list.py: -------------------------------------------------------------------------------- 1 | from gym import envs, logger 2 | import os 3 | 4 | 5 | SKIP_MUJOCO_WARNING_MESSAGE = ( 6 | "Cannot run mujoco test (either license key not found or mujoco not" 7 | "installed properly).") 8 | 9 | 10 | skip_mujoco = not (os.environ.get('MUJOCO_KEY')) 11 | if not skip_mujoco: 12 | try: 13 | import mujoco_py 14 | except ImportError: 15 | skip_mujoco = True 16 | 17 | def should_skip_env_spec_for_tests(spec): 18 | # We skip tests for envs that require dependencies or are otherwise 19 | # troublesome to run frequently 20 | ep = spec.entry_point 21 | # Skip mujoco tests for pull request CI 22 | if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')): 23 | return True 24 | try: 25 | import atari_py 26 | except ImportError: 27 | if ep.startswith('gym.envs.atari'): 28 | return True 29 | try: 30 | import Box2D 31 | except ImportError: 32 | if ep.startswith('gym.envs.box2d'): 33 | return True 34 | 35 | if ( 'GoEnv' in ep or 36 | 'HexEnv' in ep or 37 | (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest")) 38 | ): 39 | logger.warn("Skipping tests for env {}".format(ep)) 40 | return True 41 | return False 42 | 43 | spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)] 44 | -------------------------------------------------------------------------------- /gym/envs/mujoco/walker2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, a): 12 | posbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | posafter, height, ang = self.sim.data.qpos[0:3] 15 | alive_bonus = 1.0 16 | reward = ((posafter - posbefore) / self.dt) 17 | reward += alive_bonus 18 | reward -= 1e-3 * np.square(a).sum() 19 | done = not (height > 0.8 and height < 2.0 and 20 | ang > -1.0 and ang < 1.0) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | qpos = self.sim.data.qpos 26 | qvel = self.sim.data.qvel 27 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel() 28 | 29 | def reset_model(self): 30 | self.set_state( 31 | self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq), 32 | self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | ) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.5 39 | self.viewer.cam.lookat[2] = 1.15 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /gym/envs/toy_text/roulette.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | 6 | class RouletteEnv(gym.Env): 7 | """Simple roulette environment 8 | 9 | The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up, 10 | you win a reward of 35. If the parity of your bet matches the parity 11 | of the spin, you win 1. Otherwise you receive a reward of -1. 12 | 13 | The long run reward for playing 0 should be -1/37 for any state 14 | 15 | The last action (38) stops the rollout for a return of 0 (walking away) 16 | """ 17 | def __init__(self, spots=37): 18 | self.n = spots + 1 19 | self.action_space = spaces.Discrete(self.n) 20 | self.observation_space = spaces.Discrete(1) 21 | self.seed() 22 | 23 | def seed(self, seed=None): 24 | self.np_random, seed = seeding.np_random(seed) 25 | return [seed] 26 | 27 | def step(self, action): 28 | assert self.action_space.contains(action) 29 | if action == self.n - 1: 30 | # observation, reward, done, info 31 | return 0, 0, True, {} 32 | 33 | # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B] 34 | val = self.np_random.randint(0, self.n - 1) 35 | if val == action == 0: 36 | reward = self.n - 2.0 37 | elif val != 0 and action != 0 and val % 2 == action % 2: 38 | reward = 1.0 39 | else: 40 | reward = -1.0 41 | return 0, reward, False, {} 42 | 43 | def reset(self): 44 | return 0 45 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # gym 2 | 3 | The MIT License 4 | 5 | Copyright (c) 2016 OpenAI (https://openai.com) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | # Mujoco models 26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license: 27 | ``` 28 | This file is part of MuJoCo. 29 | Copyright 2009-2015 Roboti LLC. 30 | Mujoco :: Advanced physics simulation engine 31 | Source : www.roboti.us 32 | Version : 1.31 33 | Released : 23Apr16 34 | Author :: Vikash Kumar 35 | Contacts : kumar@roboti.us 36 | ``` 37 | -------------------------------------------------------------------------------- /gym/envs/mujoco/hopper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | posbefore = self.sim.data.qpos[0] 12 | self.do_simulation(a, self.frame_skip) 13 | posafter, height, ang = self.sim.data.qpos[0:3] 14 | alive_bonus = 1.0 15 | reward = (posafter - posbefore) / self.dt 16 | reward += alive_bonus 17 | reward -= 1e-3 * np.square(a).sum() 18 | s = self.state_vector() 19 | done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and 20 | (height > .7) and (abs(ang) < .2)) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | return np.concatenate([ 26 | self.sim.data.qpos.flat[1:], 27 | np.clip(self.sim.data.qvel.flat, -10, 10) 28 | ]) 29 | 30 | def reset_model(self): 31 | qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq) 32 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | self.set_state(qpos, qvel) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.75 39 | self.viewer.cam.lookat[2] = 1.15 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /gym/spaces/space.py: -------------------------------------------------------------------------------- 1 | from gym.utils import seeding 2 | 3 | 4 | class Space(object): 5 | """Defines the observation and action spaces, so you can write generic 6 | code that applies to any Env. For example, you can choose a random 7 | action. 8 | """ 9 | def __init__(self, shape=None, dtype=None): 10 | import numpy as np # takes about 300-400ms to import, so we load lazily 11 | self.shape = None if shape is None else tuple(shape) 12 | self.dtype = None if dtype is None else np.dtype(dtype) 13 | self.np_random = None 14 | self.seed() 15 | 16 | def sample(self): 17 | """Randomly sample an element of this space. Can be 18 | uniform or non-uniform sampling based on boundedness of space.""" 19 | raise NotImplementedError 20 | 21 | def seed(self, seed=None): 22 | """Seed the PRNG of this space. """ 23 | self.np_random, seed = seeding.np_random(seed) 24 | return [seed] 25 | 26 | def contains(self, x): 27 | """ 28 | Return boolean specifying if x is a valid 29 | member of this space 30 | """ 31 | raise NotImplementedError 32 | 33 | def __contains__(self, x): 34 | return self.contains(x) 35 | 36 | def to_jsonable(self, sample_n): 37 | """Convert a batch of samples from this space to a JSONable data type.""" 38 | # By default, assume identity is JSONable 39 | return sample_n 40 | 41 | def from_jsonable(self, sample_n): 42 | """Convert a JSONable data type to a batch of samples from this space.""" 43 | # By default, assume identity is JSONable 44 | return sample_n 45 | -------------------------------------------------------------------------------- /gym/envs/mujoco/inverted_double_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, action): 12 | self.do_simulation(action, self.frame_skip) 13 | ob = self._get_obs() 14 | x, _, y = self.sim.data.site_xpos[0] 15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2 16 | v1, v2 = self.sim.data.qvel[1:3] 17 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 18 | alive_bonus = 10 19 | r = alive_bonus - dist_penalty - vel_penalty 20 | done = bool(y <= 1) 21 | return ob, r, done, {} 22 | 23 | def _get_obs(self): 24 | return np.concatenate([ 25 | self.sim.data.qpos[:1], # cart x pos 26 | np.sin(self.sim.data.qpos[1:]), # link angles 27 | np.cos(self.sim.data.qpos[1:]), 28 | np.clip(self.sim.data.qvel, -10, 10), 29 | np.clip(self.sim.data.qfrc_constraint, -10, 10) 30 | ]).ravel() 31 | 32 | def reset_model(self): 33 | self.set_state( 34 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 35 | self.init_qvel + self.np_random.randn(self.model.nv) * .1 36 | ) 37 | return self._get_obs() 38 | 39 | def viewer_setup(self): 40 | v = self.viewer 41 | v.cam.trackbodyid = 0 42 | v.cam.distance = self.model.stat.extent * 0.5 43 | v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2] 44 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/pick_and_place.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /gym/envs/mujoco/reacher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2) 9 | 10 | def step(self, a): 11 | vec = self.get_body_com("fingertip")-self.get_body_com("target") 12 | reward_dist = - np.linalg.norm(vec) 13 | reward_ctrl = - np.square(a).sum() 14 | reward = reward_dist + reward_ctrl 15 | self.do_simulation(a, self.frame_skip) 16 | ob = self._get_obs() 17 | done = False 18 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) 19 | 20 | def viewer_setup(self): 21 | self.viewer.cam.trackbodyid = 0 22 | 23 | def reset_model(self): 24 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos 25 | while True: 26 | self.goal = self.np_random.uniform(low=-.2, high=.2, size=2) 27 | if np.linalg.norm(self.goal) < 0.2: 28 | break 29 | qpos[-2:] = self.goal 30 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 31 | qvel[-2:] = 0 32 | self.set_state(qpos, qvel) 33 | return self._get_obs() 34 | 35 | def _get_obs(self): 36 | theta = self.sim.data.qpos.flat[:2] 37 | return np.concatenate([ 38 | np.cos(theta), 39 | np.sin(theta), 40 | self.sim.data.qpos.flat[2:], 41 | self.sim.data.qvel.flat[:2], 42 | self.get_body_com("fingertip") - self.get_body_com("target") 43 | ]) 44 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/test_video_recorder.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import tempfile 5 | import numpy as np 6 | 7 | import gym 8 | from gym.wrappers.monitoring.video_recorder import VideoRecorder 9 | 10 | class BrokenRecordableEnv(object): 11 | metadata = {'render.modes': [None, 'rgb_array']} 12 | 13 | def render(self, mode=None): 14 | pass 15 | 16 | class UnrecordableEnv(object): 17 | metadata = {'render.modes': [None]} 18 | 19 | def render(self, mode=None): 20 | pass 21 | 22 | def test_record_simple(): 23 | env = gym.make("CartPole-v1") 24 | rec = VideoRecorder(env) 25 | env.reset() 26 | rec.capture_frame() 27 | rec.close() 28 | assert not rec.empty 29 | assert not rec.broken 30 | assert os.path.exists(rec.path) 31 | f = open(rec.path) 32 | assert os.fstat(f.fileno()).st_size > 100 33 | 34 | def test_no_frames(): 35 | env = BrokenRecordableEnv() 36 | rec = VideoRecorder(env) 37 | rec.close() 38 | assert rec.empty 39 | assert rec.functional 40 | assert not os.path.exists(rec.path) 41 | 42 | def test_record_unrecordable_method(): 43 | env = UnrecordableEnv() 44 | rec = VideoRecorder(env) 45 | assert not rec.enabled 46 | rec.close() 47 | 48 | def test_record_breaking_render_method(): 49 | env = BrokenRecordableEnv() 50 | rec = VideoRecorder(env) 51 | rec.capture_frame() 52 | rec.close() 53 | assert rec.empty 54 | assert rec.broken 55 | assert not os.path.exists(rec.path) 56 | 57 | def test_text_envs(): 58 | env = gym.make('FrozenLake-v0') 59 | video = VideoRecorder(env) 60 | try: 61 | env.reset() 62 | video.capture_frame() 63 | video.close() 64 | finally: 65 | os.remove(video.path) 66 | -------------------------------------------------------------------------------- /gym/vector/tests/test_vector_env.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym.vector.tests.utils import make_env 5 | 6 | from gym.vector.async_vector_env import AsyncVectorEnv 7 | from gym.vector.sync_vector_env import SyncVectorEnv 8 | 9 | @pytest.mark.parametrize('shared_memory', [True, False]) 10 | def test_vector_env_equal(shared_memory): 11 | env_fns = [make_env('CubeCrash-v0', i) for i in range(4)] 12 | num_steps = 100 13 | try: 14 | async_env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) 15 | sync_env = SyncVectorEnv(env_fns) 16 | 17 | async_env.seed(0) 18 | sync_env.seed(0) 19 | 20 | assert async_env.num_envs == sync_env.num_envs 21 | assert async_env.observation_space == sync_env.observation_space 22 | assert async_env.single_observation_space == sync_env.single_observation_space 23 | assert async_env.action_space == sync_env.action_space 24 | assert async_env.single_action_space == sync_env.single_action_space 25 | 26 | async_observations = async_env.reset() 27 | sync_observations = sync_env.reset() 28 | assert np.all(async_observations == sync_observations) 29 | 30 | for _ in range(num_steps): 31 | actions = async_env.action_space.sample() 32 | assert actions in sync_env.action_space 33 | 34 | async_observations, async_rewards, async_dones, _ = async_env.step(actions) 35 | sync_observations, sync_rewards, sync_dones, _ = sync_env.step(actions) 36 | 37 | assert np.all(async_observations == sync_observations) 38 | assert np.all(async_rewards == sync_rewards) 39 | assert np.all(async_dones == sync_dones) 40 | 41 | finally: 42 | async_env.close() 43 | sync_env.close() 44 | -------------------------------------------------------------------------------- /gym/envs/toy_text/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import Env, spaces 4 | from gym.utils import seeding 5 | 6 | def categorical_sample(prob_n, np_random): 7 | """ 8 | Sample from categorical distribution 9 | Each row specifies class probabilities 10 | """ 11 | prob_n = np.asarray(prob_n) 12 | csprob_n = np.cumsum(prob_n) 13 | return (csprob_n > np_random.rand()).argmax() 14 | 15 | 16 | class DiscreteEnv(Env): 17 | 18 | """ 19 | Has the following members 20 | - nS: number of states 21 | - nA: number of actions 22 | - P: transitions (*) 23 | - isd: initial state distribution (**) 24 | 25 | (*) dictionary dict of dicts of lists, where 26 | P[s][a] == [(probability, nextstate, reward, done), ...] 27 | (**) list or array of length nS 28 | 29 | 30 | """ 31 | def __init__(self, nS, nA, P, isd): 32 | self.P = P 33 | self.isd = isd 34 | self.lastaction = None # for rendering 35 | self.nS = nS 36 | self.nA = nA 37 | 38 | self.action_space = spaces.Discrete(self.nA) 39 | self.observation_space = spaces.Discrete(self.nS) 40 | 41 | self.seed() 42 | self.s = categorical_sample(self.isd, self.np_random) 43 | 44 | def seed(self, seed=None): 45 | self.np_random, seed = seeding.np_random(seed) 46 | return [seed] 47 | 48 | def reset(self): 49 | self.s = categorical_sample(self.isd, self.np_random) 50 | self.lastaction = None 51 | return self.s 52 | 53 | def step(self, a): 54 | transitions = self.P[self.s][a] 55 | i = categorical_sample([t[0] for t in transitions], self.np_random) 56 | p, s, r, d= transitions[i] 57 | self.s = s 58 | self.lastaction = a 59 | return (s, r, d, {"prob" : p}) 60 | -------------------------------------------------------------------------------- /gym/spaces/tuple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Tuple(Space): 6 | """ 7 | A tuple (i.e., product) of simpler spaces 8 | 9 | Example usage: 10 | self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3))) 11 | """ 12 | def __init__(self, spaces): 13 | self.spaces = spaces 14 | for space in spaces: 15 | assert isinstance(space, Space), "Elements of the tuple must be instances of gym.Space" 16 | super(Tuple, self).__init__(None, None) 17 | 18 | def seed(self, seed=None): 19 | [space.seed(seed) for space in self.spaces] 20 | 21 | def sample(self): 22 | return tuple([space.sample() for space in self.spaces]) 23 | 24 | def contains(self, x): 25 | if isinstance(x, list): 26 | x = tuple(x) # Promote list to tuple for contains check 27 | return isinstance(x, tuple) and len(x) == len(self.spaces) and all( 28 | space.contains(part) for (space,part) in zip(self.spaces,x)) 29 | 30 | def __repr__(self): 31 | return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")" 32 | 33 | def to_jsonable(self, sample_n): 34 | # serialize as list-repr of tuple of vectors 35 | return [space.to_jsonable([sample[i] for sample in sample_n]) \ 36 | for i, space in enumerate(self.spaces)] 37 | 38 | def from_jsonable(self, sample_n): 39 | return [sample for sample in zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)])] 40 | 41 | def __getitem__(self, index): 42 | return self.spaces[index] 43 | 44 | def __len__(self): 45 | return len(self.spaces) 46 | 47 | def __eq__(self, other): 48 | return isinstance(other, Tuple) and self.spaces == other.spaces 49 | -------------------------------------------------------------------------------- /gym/envs/mujoco/ant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | xposbefore = self.get_body_com("torso")[0] 12 | self.do_simulation(a, self.frame_skip) 13 | xposafter = self.get_body_com("torso")[0] 14 | forward_reward = (xposafter - xposbefore)/self.dt 15 | ctrl_cost = .5 * np.square(a).sum() 16 | contact_cost = 0.5 * 1e-3 * np.sum( 17 | np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))) 18 | survive_reward = 1.0 19 | reward = forward_reward - ctrl_cost - contact_cost + survive_reward 20 | state = self.state_vector() 21 | notdone = np.isfinite(state).all() \ 22 | and state[2] >= 0.2 and state[2] <= 1.0 23 | done = not notdone 24 | ob = self._get_obs() 25 | return ob, reward, done, dict( 26 | reward_forward=forward_reward, 27 | reward_ctrl=-ctrl_cost, 28 | reward_contact=-contact_cost, 29 | reward_survive=survive_reward) 30 | 31 | def _get_obs(self): 32 | return np.concatenate([ 33 | self.sim.data.qpos.flat[2:], 34 | self.sim.data.qvel.flat, 35 | np.clip(self.sim.data.cfrc_ext, -1, 1).flat, 36 | ]) 37 | 38 | def reset_model(self): 39 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1) 40 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 41 | self.set_state(qpos, qvel) 42 | return self._get_obs() 43 | 44 | def viewer_setup(self): 45 | self.viewer.cam.distance = self.model.stat.extent * 0.5 46 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/point.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 32 | -------------------------------------------------------------------------------- /docs/creating-environments.md: -------------------------------------------------------------------------------- 1 | # How to create new environments for Gym 2 | 3 | * Create a new repo called gym-foo, which should also be a PIP package. 4 | 5 | * A good example is https://github.com/openai/gym-soccer. 6 | 7 | * It should have at least the following files: 8 | ```sh 9 | gym-foo/ 10 | README.md 11 | setup.py 12 | gym_foo/ 13 | __init__.py 14 | envs/ 15 | __init__.py 16 | foo_env.py 17 | foo_extrahard_env.py 18 | ``` 19 | 20 | * `gym-foo/setup.py` should have: 21 | 22 | ```python 23 | from setuptools import setup 24 | 25 | setup(name='gym_foo', 26 | version='0.0.1', 27 | install_requires=['gym'] # And any other dependencies foo needs 28 | ) 29 | ``` 30 | 31 | * `gym-foo/gym_foo/__init__.py` should have: 32 | ```python 33 | from gym.envs.registration import register 34 | 35 | register( 36 | id='foo-v0', 37 | entry_point='gym_foo.envs:FooEnv', 38 | ) 39 | register( 40 | id='foo-extrahard-v0', 41 | entry_point='gym_foo.envs:FooExtraHardEnv', 42 | ) 43 | ``` 44 | 45 | * `gym-foo/gym_foo/envs/__init__.py` should have: 46 | ```python 47 | from gym_foo.envs.foo_env import FooEnv 48 | from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv 49 | ``` 50 | 51 | * `gym-foo/gym_foo/envs/foo_env.py` should look something like: 52 | ```python 53 | import gym 54 | from gym import error, spaces, utils 55 | from gym.utils import seeding 56 | 57 | class FooEnv(gym.Env): 58 | metadata = {'render.modes': ['human']} 59 | 60 | def __init__(self): 61 | ... 62 | def step(self, action): 63 | ... 64 | def reset(self): 65 | ... 66 | def render(self, mode='human'): 67 | ... 68 | def close(self): 69 | ... 70 | ``` 71 | 72 | * After you have installed your package with `pip install -e gym-foo`, you can create an instance of the environment with `gym.make('gym_foo:foo-v0')` 73 | -------------------------------------------------------------------------------- /gym/envs/mujoco/humanoidstandup.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco import mujoco_env 2 | from gym import utils 3 | import numpy as np 4 | 5 | class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def _get_obs(self): 11 | data = self.sim.data 12 | return np.concatenate([data.qpos.flat[2:], 13 | data.qvel.flat, 14 | data.cinert.flat, 15 | data.cvel.flat, 16 | data.qfrc_actuator.flat, 17 | data.cfrc_ext.flat]) 18 | 19 | def step(self, a): 20 | self.do_simulation(a, self.frame_skip) 21 | pos_after = self.sim.data.qpos[2] 22 | data = self.sim.data 23 | uph_cost = (pos_after - 0) / self.model.opt.timestep 24 | 25 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 26 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() 27 | quad_impact_cost = min(quad_impact_cost, 10) 28 | reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 29 | 30 | done = bool(False) 31 | return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost) 32 | 33 | def reset_model(self): 34 | c = 0.01 35 | self.set_state( 36 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 37 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,) 38 | ) 39 | return self._get_obs() 40 | 41 | def viewer_setup(self): 42 | self.viewer.cam.trackbodyid = 1 43 | self.viewer.cam.distance = self.model.stat.extent * 1.0 44 | self.viewer.cam.lookat[2] = 0.8925 45 | self.viewer.cam.elevation = -20 46 | -------------------------------------------------------------------------------- /gym/wrappers/test_transform_reward.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import TransformReward 7 | 8 | 9 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0']) 10 | def test_transform_reward(env_id): 11 | # use case #1: scale 12 | scales = [0.1, 200] 13 | for scale in scales: 14 | env = gym.make(env_id) 15 | wrapped_env = TransformReward(gym.make(env_id), lambda r: scale*r) 16 | action = env.action_space.sample() 17 | 18 | env.seed(0) 19 | env.reset() 20 | wrapped_env.seed(0) 21 | wrapped_env.reset() 22 | 23 | _, reward, _, _ = env.step(action) 24 | _, wrapped_reward, _, _ = wrapped_env.step(action) 25 | 26 | assert wrapped_reward == scale*reward 27 | del env, wrapped_env 28 | 29 | # use case #2: clip 30 | min_r = -0.0005 31 | max_r = 0.0002 32 | env = gym.make(env_id) 33 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r)) 34 | action = env.action_space.sample() 35 | 36 | env.seed(0) 37 | env.reset() 38 | wrapped_env.seed(0) 39 | wrapped_env.reset() 40 | 41 | _, reward, _, _ = env.step(action) 42 | _, wrapped_reward, _, _ = wrapped_env.step(action) 43 | 44 | assert abs(wrapped_reward) < abs(reward) 45 | assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002 46 | del env, wrapped_env 47 | 48 | # use case #3: sign 49 | env = gym.make(env_id) 50 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r)) 51 | 52 | env.seed(0) 53 | env.reset() 54 | wrapped_env.seed(0) 55 | wrapped_env.reset() 56 | 57 | for _ in range(1000): 58 | action = env.action_space.sample() 59 | _, wrapped_reward, done, _ = wrapped_env.step(action) 60 | assert wrapped_reward in [-1.0, 0.0, 1.0] 61 | if done: 62 | break 63 | del env, wrapped_env 64 | -------------------------------------------------------------------------------- /examples/agents/random_agent.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | import gym 5 | from gym import wrappers, logger 6 | 7 | class RandomAgent(object): 8 | """The world's simplest agent!""" 9 | def __init__(self, action_space): 10 | self.action_space = action_space 11 | 12 | def act(self, observation, reward, done): 13 | return self.action_space.sample() 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser(description=None) 17 | parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run') 18 | args = parser.parse_args() 19 | 20 | # You can set the level to logger.DEBUG or logger.WARN if you 21 | # want to change the amount of output. 22 | logger.set_level(logger.INFO) 23 | 24 | env = gym.make(args.env_id) 25 | 26 | # You provide the directory to write to (can be an existing 27 | # directory, including one with existing data -- all monitor files 28 | # will be namespaced). You can also dump to a tempdir if you'd 29 | # like: tempfile.mkdtemp(). 30 | outdir = '/tmp/random-agent-results' 31 | env = wrappers.Monitor(env, directory=outdir, force=True) 32 | env.seed(0) 33 | agent = RandomAgent(env.action_space) 34 | 35 | episode_count = 100 36 | reward = 0 37 | done = False 38 | 39 | for i in range(episode_count): 40 | ob = env.reset() 41 | while True: 42 | action = agent.act(ob, reward, done) 43 | ob, reward, done, _ = env.step(action) 44 | if done: 45 | break 46 | # Note there's no env.render() here. But the environment still can open window and 47 | # render if asked by env.monitor: it calls env.render('rgb_array') to record video. 48 | # Video is not recorded every episode, see capped_cubic_video_schedule for details. 49 | 50 | # Close the env and write monitor result info to disk 51 | env.close() 52 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/inverted_double_pendulum.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /gym/envs/mujoco/pusher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | import mujoco_py 6 | 7 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): 8 | def __init__(self): 9 | utils.EzPickle.__init__(self) 10 | mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5) 11 | 12 | def step(self, a): 13 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") 14 | vec_2 = self.get_body_com("object") - self.get_body_com("goal") 15 | 16 | reward_near = - np.linalg.norm(vec_1) 17 | reward_dist = - np.linalg.norm(vec_2) 18 | reward_ctrl = - np.square(a).sum() 19 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near 20 | 21 | self.do_simulation(a, self.frame_skip) 22 | ob = self._get_obs() 23 | done = False 24 | return ob, reward, done, dict(reward_dist=reward_dist, 25 | reward_ctrl=reward_ctrl) 26 | 27 | def viewer_setup(self): 28 | self.viewer.cam.trackbodyid = -1 29 | self.viewer.cam.distance = 4.0 30 | 31 | def reset_model(self): 32 | qpos = self.init_qpos 33 | 34 | self.goal_pos = np.asarray([0, 0]) 35 | while True: 36 | self.cylinder_pos = np.concatenate([ 37 | self.np_random.uniform(low=-0.3, high=0, size=1), 38 | self.np_random.uniform(low=-0.2, high=0.2, size=1)]) 39 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: 40 | break 41 | 42 | qpos[-4:-2] = self.cylinder_pos 43 | qpos[-2:] = self.goal_pos 44 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 45 | high=0.005, size=self.model.nv) 46 | qvel[-4:] = 0 47 | self.set_state(qpos, qvel) 48 | return self._get_obs() 49 | 50 | def _get_obs(self): 51 | return np.concatenate([ 52 | self.sim.data.qpos.flat[:7], 53 | self.sim.data.qvel.flat[:7], 54 | self.get_body_com("tips_arm"), 55 | self.get_body_com("object"), 56 | self.get_body_com("goal"), 57 | ]) 58 | -------------------------------------------------------------------------------- /gym/vector/tests/test_spaces.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym.spaces import Box, MultiDiscrete, Tuple, Dict 5 | from gym.vector.tests.utils import spaces 6 | 7 | from gym.vector.utils.spaces import _BaseGymSpaces, batch_space 8 | 9 | expected_batch_spaces_4 = [ 10 | Box(low=-1., high=1., shape=(4,), dtype=np.float64), 11 | Box(low=0., high=10., shape=(4, 1), dtype=np.float32), 12 | Box(low=np.array([[-1., 0., 0.], [-1., 0., 0.], [-1., 0., 0.], [-1., 0., 0.]]), 13 | high=np.array([[1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.]]), dtype=np.float32), 14 | Box(low=np.array([[[-1., 0.], [0., -1.]], [[-1., 0.], [0., -1.]], [[-1., 0.], [0., -1]], 15 | [[-1., 0.], [0., -1.]]]), high=np.ones((4, 2, 2)), dtype=np.float32), 16 | Box(low=0, high=255, shape=(4,), dtype=np.uint8), 17 | Box(low=0, high=255, shape=(4, 32, 32, 3), dtype=np.uint8), 18 | MultiDiscrete([2, 2, 2, 2]), 19 | Tuple((MultiDiscrete([3, 3, 3, 3]), MultiDiscrete([5, 5, 5, 5]))), 20 | Tuple((MultiDiscrete([7, 7, 7, 7]), Box(low=np.array([[0., -1.], [0., -1.], [0., -1.], [0., -1]]), 21 | high=np.array([[1., 1.], [1., 1.], [1., 1.], [1., 1.]]), dtype=np.float32))), 22 | Box(low=np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]), 23 | high=np.array([[10, 12, 16], [10, 12, 16], [10, 12, 16], [10, 12, 16]]), dtype=np.int64), 24 | Box(low=0, high=1, shape=(4, 19), dtype=np.int8), 25 | Dict({ 26 | 'position': MultiDiscrete([23, 23, 23, 23]), 27 | 'velocity': Box(low=0., high=1., shape=(4, 1), dtype=np.float32) 28 | }), 29 | Dict({ 30 | 'position': Dict({'x': MultiDiscrete([29, 29, 29, 29]), 'y': MultiDiscrete([31, 31, 31, 31])}), 31 | 'velocity': Tuple((MultiDiscrete([37, 37, 37, 37]), Box(low=0, high=255, shape=(4,), dtype=np.uint8))) 32 | }) 33 | ] 34 | 35 | @pytest.mark.parametrize('space,expected_batch_space_4', list(zip(spaces, 36 | expected_batch_spaces_4)), ids=[space.__class__.__name__ for space in spaces]) 37 | def test_batch_space(space, expected_batch_space_4): 38 | batch_space_4 = batch_space(space, n=4) 39 | assert batch_space_4 == expected_batch_space_4 40 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/shared_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /gym/utils/atomic_write.py: -------------------------------------------------------------------------------- 1 | # Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python 2 | 3 | import os 4 | from contextlib import contextmanager 5 | 6 | # We would ideally atomically replace any existing file with the new 7 | # version. However, on Windows there's no Python-only solution prior 8 | # to Python 3.3. (This library includes a C extension to do so: 9 | # https://pypi.python.org/pypi/pyosreplace/0.1.) 10 | # 11 | # Correspondingly, we make a best effort, but on Python < 3.3 use a 12 | # replace method which could result in the file temporarily 13 | # disappearing. 14 | import sys 15 | if sys.version_info >= (3, 3): 16 | # Python 3.3 and up have a native `replace` method 17 | from os import replace 18 | elif sys.platform.startswith("win"): 19 | def replace(src, dst): 20 | # TODO: on Windows, this will raise if the file is in use, 21 | # which is possible. We'll need to make this more robust over 22 | # time. 23 | try: 24 | os.remove(dst) 25 | except OSError: 26 | pass 27 | os.rename(src, dst) 28 | else: 29 | # POSIX rename() is always atomic 30 | from os import rename as replace 31 | 32 | @contextmanager 33 | def atomic_write(filepath, binary=False, fsync=False): 34 | """ Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked. 35 | 36 | :param filepath: the file path to be opened 37 | :param binary: whether to open the file in a binary mode instead of textual 38 | :param fsync: whether to force write the file to disk 39 | """ 40 | 41 | tmppath = filepath + '~' 42 | while os.path.isfile(tmppath): 43 | tmppath += '~' 44 | try: 45 | with open(tmppath, 'wb' if binary else 'w') as file: 46 | yield file 47 | if fsync: 48 | file.flush() 49 | os.fsync(file.fileno()) 50 | replace(tmppath, filepath) 51 | finally: 52 | try: 53 | os.remove(tmppath) 54 | except (IOError, OSError): 55 | pass 56 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Configuration for probot-stale - https://github.com/probot/stale 2 | 3 | # Number of days of inactivity before an Issue or Pull Request becomes stale 4 | daysUntilStale: 60 5 | 6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. 7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. 8 | daysUntilClose: 14 9 | 10 | # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) 11 | onlyLabels: 12 | - more-information-needed 13 | 14 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable 15 | exemptLabels: 16 | - pinned 17 | - security 18 | - "[Status] Maybe Later" 19 | 20 | # Set to true to ignore issues in a project (defaults to false) 21 | exemptProjects: true 22 | 23 | # Set to true to ignore issues in a milestone (defaults to false) 24 | exemptMilestones: true 25 | 26 | # Set to true to ignore issues with an assignee (defaults to false) 27 | exemptAssignees: true 28 | 29 | # Label to use when marking as stale 30 | staleLabel: stale 31 | 32 | # Comment to post when marking as stale. Set to `false` to disable 33 | markComment: > 34 | This issue has been automatically marked as stale because it has not had 35 | recent activity. It will be closed if no further activity occurs. Thank you 36 | for your contributions. 37 | 38 | # Comment to post when removing the stale label. 39 | # unmarkComment: > 40 | # Your comment here. 41 | 42 | # Comment to post when closing a stale Issue or Pull Request. 43 | # closeComment: > 44 | # Your comment here. 45 | 46 | # Limit the number of actions per hour, from 1-30. Default is 30 47 | limitPerRun: 30 48 | 49 | # Limit to only `issues` or `pulls` 50 | only: issues 51 | 52 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': 53 | # pulls: 54 | # daysUntilStale: 30 55 | # markComment: > 56 | # This pull request has been automatically marked as stale because it has not had 57 | # recent activity. It will be closed if no further activity occurs. Thank you 58 | # for your contributions. 59 | 60 | # issues: 61 | # exemptLabels: 62 | # - confirmed -------------------------------------------------------------------------------- /gym/envs/robotics/hand_env.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import numpy as np 4 | 5 | import gym 6 | from gym import error, spaces 7 | from gym.utils import seeding 8 | from gym.envs.robotics import robot_env 9 | 10 | 11 | class HandEnv(robot_env.RobotEnv): 12 | def __init__(self, model_path, n_substeps, initial_qpos, relative_control): 13 | self.relative_control = relative_control 14 | 15 | super(HandEnv, self).__init__( 16 | model_path=model_path, n_substeps=n_substeps, n_actions=20, 17 | initial_qpos=initial_qpos) 18 | 19 | # RobotEnv methods 20 | # ---------------------------- 21 | 22 | def _set_action(self, action): 23 | assert action.shape == (20,) 24 | 25 | ctrlrange = self.sim.model.actuator_ctrlrange 26 | actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2. 27 | if self.relative_control: 28 | actuation_center = np.zeros_like(action) 29 | for i in range(self.sim.data.ctrl.shape[0]): 30 | actuation_center[i] = self.sim.data.get_joint_qpos( 31 | self.sim.model.actuator_names[i].replace(':A_', ':')) 32 | for joint_name in ['FF', 'MF', 'RF', 'LF']: 33 | act_idx = self.sim.model.actuator_name2id( 34 | 'robot0:A_{}J1'.format(joint_name)) 35 | actuation_center[act_idx] += self.sim.data.get_joint_qpos( 36 | 'robot0:{}J0'.format(joint_name)) 37 | else: 38 | actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2. 39 | self.sim.data.ctrl[:] = actuation_center + action * actuation_range 40 | self.sim.data.ctrl[:] = np.clip(self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1]) 41 | 42 | def _viewer_setup(self): 43 | body_id = self.sim.model.body_name2id('robot0:palm') 44 | lookat = self.sim.data.body_xpos[body_id] 45 | for idx, value in enumerate(lookat): 46 | self.viewer.cam.lookat[idx] = value 47 | self.viewer.cam.distance = 0.5 48 | self.viewer.cam.azimuth = 55. 49 | self.viewer.cam.elevation = -25. 50 | 51 | def render(self, mode='human', width=500, height=500): 52 | return super(HandEnv, self).render(mode, width, height) 53 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import sys, os.path 3 | 4 | # Don't import gym module here, since deps may not be installed 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'gym')) 6 | from version import VERSION 7 | 8 | # Environment-specific dependencies. 9 | extras = { 10 | 'atari': ['atari_py~=0.2.0', 'Pillow', 'opencv-python'], 11 | 'box2d': ['box2d-py~=2.3.5'], 12 | 'classic_control': [], 13 | 'mujoco': ['mujoco_py>=1.50, <2.0', 'imageio'], 14 | 'robotics': ['mujoco_py>=1.50, <2.0', 'imageio'], 15 | } 16 | 17 | # Meta dependency groups. 18 | extras['all'] = [item for group in extras.values() for item in group] 19 | 20 | setup(name='gym', 21 | version=VERSION, 22 | description='The OpenAI Gym: A toolkit for developing and comparing your reinforcement learning agents.', 23 | url='https://github.com/openai/gym', 24 | author='OpenAI', 25 | author_email='gym@openai.com', 26 | license='', 27 | packages=[package for package in find_packages() 28 | if package.startswith('gym')], 29 | zip_safe=False, 30 | install_requires=[ 31 | 'scipy', 'numpy>=1.10.4', 'six', 'pyglet>=1.2.0,<=1.3.2', 'cloudpickle~=1.2.0', 32 | 'enum34~=1.1.6;python_version<"3.4"', 'opencv-python' 33 | ], 34 | extras_require=extras, 35 | package_data={'gym': [ 36 | 'envs/mujoco/assets/*.xml', 37 | 'envs/classic_control/assets/*.png', 38 | 'envs/robotics/assets/LICENSE.md', 39 | 'envs/robotics/assets/fetch/*.xml', 40 | 'envs/robotics/assets/hand/*.xml', 41 | 'envs/robotics/assets/stls/fetch/*.stl', 42 | 'envs/robotics/assets/stls/hand/*.stl', 43 | 'envs/robotics/assets/textures/*.png'] 44 | }, 45 | tests_require=['pytest', 'mock'], 46 | python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*', 47 | classifiers=[ 48 | 'Programming Language :: Python :: 2', 49 | 'Programming Language :: Python :: 2.7', 50 | 'Programming Language :: Python :: 3', 51 | 'Programming Language :: Python :: 3.5', 52 | 'Programming Language :: Python :: 3.6', 53 | 'Programming Language :: Python :: 3.7', 54 | ], 55 | ) 56 | -------------------------------------------------------------------------------- /gym/envs/mujoco/humanoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.envs.mujoco import mujoco_env 3 | from gym import utils 4 | 5 | def mass_center(model, sim): 6 | mass = np.expand_dims(model.body_mass, 1) 7 | xpos = sim.data.xipos 8 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0] 9 | 10 | class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): 11 | def __init__(self): 12 | mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5) 13 | utils.EzPickle.__init__(self) 14 | 15 | def _get_obs(self): 16 | data = self.sim.data 17 | return np.concatenate([data.qpos.flat[2:], 18 | data.qvel.flat, 19 | data.cinert.flat, 20 | data.cvel.flat, 21 | data.qfrc_actuator.flat, 22 | data.cfrc_ext.flat]) 23 | 24 | def step(self, a): 25 | pos_before = mass_center(self.model, self.sim) 26 | self.do_simulation(a, self.frame_skip) 27 | pos_after = mass_center(self.model, self.sim) 28 | alive_bonus = 5.0 29 | data = self.sim.data 30 | lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt 31 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 32 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() 33 | quad_impact_cost = min(quad_impact_cost, 10) 34 | reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus 35 | qpos = self.sim.data.qpos 36 | done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) 37 | return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost) 38 | 39 | def reset_model(self): 40 | c = 0.01 41 | self.set_state( 42 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 43 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,) 44 | ) 45 | return self._get_obs() 46 | 47 | def viewer_setup(self): 48 | self.viewer.cam.trackbodyid = 1 49 | self.viewer.cam.distance = self.model.stat.extent * 1.0 50 | self.viewer.cam.lookat[2] = 2.0 51 | self.viewer.cam.elevation = -20 52 | -------------------------------------------------------------------------------- /gym/utils/closer.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import threading 3 | import weakref 4 | 5 | class Closer(object): 6 | """A registry that ensures your objects get closed, whether manually, 7 | upon garbage collection, or upon exit. To work properly, your 8 | objects need to cooperate and do something like the following: 9 | 10 | ``` 11 | closer = Closer() 12 | class Example(object): 13 | def __init__(self): 14 | self._id = closer.register(self) 15 | 16 | def close(self): 17 | # Probably worth making idempotent too! 18 | ... 19 | closer.unregister(self._id) 20 | 21 | def __del__(self): 22 | self.close() 23 | ``` 24 | 25 | That is, your objects should: 26 | 27 | - register() themselves and save the returned ID 28 | - unregister() themselves upon close() 29 | - include a __del__ method which close()'s the object 30 | """ 31 | 32 | def __init__(self, atexit_register=True): 33 | self.lock = threading.Lock() 34 | self.next_id = -1 35 | self.closeables = weakref.WeakValueDictionary() 36 | 37 | if atexit_register: 38 | atexit.register(self.close) 39 | 40 | def generate_next_id(self): 41 | with self.lock: 42 | self.next_id += 1 43 | return self.next_id 44 | 45 | def register(self, closeable): 46 | """Registers an object with a 'close' method. 47 | 48 | Returns: 49 | int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired. 50 | """ 51 | assert hasattr(closeable, 'close'), 'No close method for {}'.format(closeable) 52 | 53 | next_id = self.generate_next_id() 54 | self.closeables[next_id] = closeable 55 | return next_id 56 | 57 | def unregister(self, id): 58 | assert id is not None 59 | if id in self.closeables: 60 | del self.closeables[id] 61 | 62 | def close(self): 63 | # Explicitly fetch all monitors first so that they can't disappear while 64 | # we iterate. cf. http://stackoverflow.com/a/12429620 65 | closeables = list(self.closeables.values()) 66 | for closeable in closeables: 67 | closeable.close() 68 | -------------------------------------------------------------------------------- /gym/envs/mujoco/thrower.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | self._ball_hit_ground = False 9 | self._ball_hit_location = None 10 | mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5) 11 | 12 | def step(self, a): 13 | ball_xy = self.get_body_com("ball")[:2] 14 | goal_xy = self.get_body_com("goal")[:2] 15 | 16 | if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25: 17 | self._ball_hit_ground = True 18 | self._ball_hit_location = self.get_body_com("ball") 19 | 20 | if self._ball_hit_ground: 21 | ball_hit_xy = self._ball_hit_location[:2] 22 | reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy) 23 | else: 24 | reward_dist = -np.linalg.norm(ball_xy - goal_xy) 25 | reward_ctrl = - np.square(a).sum() 26 | 27 | reward = reward_dist + 0.002 * reward_ctrl 28 | self.do_simulation(a, self.frame_skip) 29 | ob = self._get_obs() 30 | done = False 31 | return ob, reward, done, dict(reward_dist=reward_dist, 32 | reward_ctrl=reward_ctrl) 33 | 34 | def viewer_setup(self): 35 | self.viewer.cam.trackbodyid = 0 36 | self.viewer.cam.distance = 4.0 37 | 38 | def reset_model(self): 39 | self._ball_hit_ground = False 40 | self._ball_hit_location = None 41 | 42 | qpos = self.init_qpos 43 | self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3), 44 | self.np_random.uniform(low=-0.3, high=0.3)]) 45 | 46 | qpos[-9:-7] = self.goal 47 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 48 | high=0.005, size=self.model.nv) 49 | qvel[7:] = 0 50 | self.set_state(qpos, qvel) 51 | return self._get_obs() 52 | 53 | def _get_obs(self): 54 | return np.concatenate([ 55 | self.sim.data.qpos.flat[:7], 56 | self.sim.data.qvel.flat[:7], 57 | self.get_body_com("r_wrist_roll_link"), 58 | self.get_body_com("ball"), 59 | self.get_body_com("goal"), 60 | ]) 61 | -------------------------------------------------------------------------------- /gym/vector/tests/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | import time 4 | 5 | from gym.spaces import Box, Discrete, MultiDiscrete, MultiBinary, Tuple, Dict 6 | 7 | spaces = [ 8 | Box(low=np.array(-1.), high=np.array(1.), dtype=np.float64), 9 | Box(low=np.array([0.]), high=np.array([10.]), dtype=np.float32), 10 | Box(low=np.array([-1., 0., 0.]), high=np.array([1., 1., 1.]), dtype=np.float32), 11 | Box(low=np.array([[-1., 0.], [0., -1.]]), high=np.ones((2, 2)), dtype=np.float32), 12 | Box(low=0, high=255, shape=(), dtype=np.uint8), 13 | Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8), 14 | Discrete(2), 15 | Tuple((Discrete(3), Discrete(5))), 16 | Tuple((Discrete(7), Box(low=np.array([0., -1.]), high=np.array([1., 1.]), dtype=np.float32))), 17 | MultiDiscrete([11, 13, 17]), 18 | MultiBinary(19), 19 | Dict({ 20 | 'position': Discrete(23), 21 | 'velocity': Box(low=np.array([0.]), high=np.array([1.]), dtype=np.float32) 22 | }), 23 | Dict({ 24 | 'position': Dict({'x': Discrete(29), 'y': Discrete(31)}), 25 | 'velocity': Tuple((Discrete(37), Box(low=0, high=255, shape=(), dtype=np.uint8))) 26 | }) 27 | ] 28 | 29 | HEIGHT, WIDTH = 64, 64 30 | 31 | class UnittestSlowEnv(gym.Env): 32 | def __init__(self, slow_reset=0.3): 33 | super(UnittestSlowEnv, self).__init__() 34 | self.slow_reset = slow_reset 35 | self.observation_space = Box(low=0, high=255, 36 | shape=(HEIGHT, WIDTH, 3), dtype=np.uint8) 37 | self.action_space = Box(low=0., high=1., shape=(), dtype=np.float32) 38 | 39 | def reset(self): 40 | if self.slow_reset > 0: 41 | time.sleep(self.slow_reset) 42 | return self.observation_space.sample() 43 | 44 | def step(self, action): 45 | time.sleep(action) 46 | observation = self.observation_space.sample() 47 | reward, done = 0., False 48 | return observation, reward, done, {} 49 | 50 | def make_env(env_name, seed): 51 | def _make(): 52 | env = gym.make(env_name) 53 | env.seed(seed) 54 | return env 55 | return _make 56 | 57 | def make_slow_env(slow_reset, seed): 58 | def _make(): 59 | env = UnittestSlowEnv(slow_reset=slow_reset) 60 | env.seed(seed) 61 | return env 62 | return _make 63 | -------------------------------------------------------------------------------- /examples/scripts/sim_env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import gym 3 | from gym import spaces, envs 4 | import argparse 5 | import numpy as np 6 | import itertools 7 | import time 8 | from builtins import input 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("env") 12 | parser.add_argument("--mode", choices=["noop", "random", "human"], 13 | default="random") 14 | parser.add_argument("--max_steps", type=int, default=0) 15 | parser.add_argument("--fps",type=float) 16 | parser.add_argument("--once", action="store_true") 17 | parser.add_argument("--ignore_done", action="store_true") 18 | args = parser.parse_args() 19 | 20 | env = envs.make(args.env) 21 | ac_space = env.action_space 22 | 23 | fps = args.fps or env.metadata.get('video.frames_per_second') or 100 24 | if args.max_steps == 0: args.max_steps = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'] 25 | 26 | while True: 27 | env.reset() 28 | env.render(mode='human') 29 | print("Starting a new trajectory") 30 | for t in range(args.max_steps) if args.max_steps else itertools.count(): 31 | done = False 32 | if args.mode == "noop": 33 | if isinstance(ac_space, spaces.Box): 34 | a = np.zeros(ac_space.shape) 35 | elif isinstance(ac_space, spaces.Discrete): 36 | a = 0 37 | else: 38 | raise NotImplementedError("noop not implemented for class {}".format(type(ac_space))) 39 | time.sleep(1.0/fps) 40 | elif args.mode == "random": 41 | a = ac_space.sample() 42 | time.sleep(1.0/fps) 43 | elif args.mode == "human": 44 | a = input("type action from {0,...,%i} and press enter: "%(ac_space.n-1)) 45 | try: 46 | a = int(a) 47 | except ValueError: 48 | print("WARNING: ignoring illegal action '{}'.".format(a)) 49 | a = 0 50 | if a >= ac_space.n: 51 | print("WARNING: ignoring illegal action {}.".format(a)) 52 | a = 0 53 | _, _, done, _ = env.step(a) 54 | 55 | env.render() 56 | if done and not args.ignore_done: 57 | break 58 | print("Done after {} steps".format(t+1)) 59 | if args.once: 60 | break 61 | else: 62 | input("Press enter to continue") 63 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_pen.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /gym/envs/tests/test_registration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import gym 3 | from gym import error, envs 4 | from gym.envs import registration 5 | from gym.envs.classic_control import cartpole 6 | 7 | class ArgumentEnv(gym.Env): 8 | def __init__(self, arg1, arg2, arg3): 9 | self.arg1 = arg1 10 | self.arg2 = arg2 11 | self.arg3 = arg3 12 | 13 | gym.register( 14 | id='test.ArgumentEnv-v0', 15 | entry_point='gym.envs.tests.test_registration:ArgumentEnv', 16 | kwargs={ 17 | 'arg1': 'arg1', 18 | 'arg2': 'arg2', 19 | } 20 | ) 21 | 22 | def test_make(): 23 | env = envs.make('CartPole-v0') 24 | assert env.spec.id == 'CartPole-v0' 25 | assert isinstance(env.unwrapped, cartpole.CartPoleEnv) 26 | 27 | def test_make_with_kwargs(): 28 | env = envs.make('test.ArgumentEnv-v0', arg2='override_arg2', arg3='override_arg3') 29 | assert env.spec.id == 'test.ArgumentEnv-v0' 30 | assert isinstance(env.unwrapped, ArgumentEnv) 31 | assert env.arg1 == 'arg1' 32 | assert env.arg2 == 'override_arg2' 33 | assert env.arg3 == 'override_arg3' 34 | 35 | def test_make_deprecated(): 36 | try: 37 | envs.make('Humanoid-v0') 38 | except error.Error: 39 | pass 40 | else: 41 | assert False 42 | 43 | def test_spec(): 44 | spec = envs.spec('CartPole-v0') 45 | assert spec.id == 'CartPole-v0' 46 | 47 | def test_missing_lookup(): 48 | registry = registration.EnvRegistry() 49 | registry.register(id='Test-v0', entry_point=None) 50 | registry.register(id='Test-v15', entry_point=None) 51 | registry.register(id='Test-v9', entry_point=None) 52 | registry.register(id='Other-v100', entry_point=None) 53 | try: 54 | registry.spec('Test-v1') # must match an env name but not the version above 55 | except error.DeprecatedEnv: 56 | pass 57 | else: 58 | assert False 59 | 60 | try: 61 | registry.spec('Unknown-v1') 62 | except error.UnregisteredEnv: 63 | pass 64 | else: 65 | assert False 66 | 67 | def test_malformed_lookup(): 68 | registry = registration.EnvRegistry() 69 | try: 70 | registry.spec(u'“Breakout-v0”') 71 | except error.Error as e: 72 | assert 'malformed environment ID' in '{}'.format(e), 'Unexpected message: {}'.format(e) 73 | else: 74 | assert False 75 | -------------------------------------------------------------------------------- /gym/vector/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from collections.abc import Iterable 3 | except ImportError: 4 | Iterable = (tuple, list) 5 | 6 | from gym.vector.async_vector_env import AsyncVectorEnv 7 | from gym.vector.sync_vector_env import SyncVectorEnv 8 | from gym.vector.vector_env import VectorEnv 9 | 10 | __all__ = ['AsyncVectorEnv', 'SyncVectorEnv', 'VectorEnv', 'make'] 11 | 12 | def make(id, num_envs=1, asynchronous=True, wrappers=None, **kwargs): 13 | """Create a vectorized environment from multiple copies of an environment, 14 | from its id 15 | 16 | Parameters 17 | ---------- 18 | id : str 19 | The environment ID. This must be a valid ID from the registry. 20 | 21 | num_envs : int 22 | Number of copies of the environment. 23 | 24 | asynchronous : bool (default: `True`) 25 | If `True`, wraps the environments in an `AsyncVectorEnv` (which uses 26 | `multiprocessing` to run the environments in parallel). If `False`, 27 | wraps the environments in a `SyncVectorEnv`. 28 | 29 | wrappers : Callable or Iterable of Callables (default: `None`) 30 | If not `None`, then apply the wrappers to each internal 31 | environment during creation. 32 | 33 | Returns 34 | ------- 35 | env : `gym.vector.VectorEnv` instance 36 | The vectorized environment. 37 | 38 | Example 39 | ------- 40 | >>> import gym 41 | >>> env = gym.vector.make('CartPole-v1', 3) 42 | >>> env.reset() 43 | array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827], 44 | [ 0.03073904, 0.00145001, -0.03088818, -0.03131252], 45 | [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]], 46 | dtype=float32) 47 | """ 48 | from gym.envs import make as make_ 49 | def _make_env(): 50 | env = make_(id, **kwargs) 51 | if wrappers is not None: 52 | if callable(wrappers): 53 | env = wrappers(env) 54 | elif isinstance(wrappers, Iterable) and all([callable(w) for w in wrappers]): 55 | for wrapper in wrappers: 56 | env = wrapper(env) 57 | else: 58 | raise NotImplementedError 59 | return env 60 | env_fns = [_make_env for _ in range(num_envs)] 61 | return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns) 62 | -------------------------------------------------------------------------------- /gym/envs/toy_text/nchain.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | class NChainEnv(gym.Env): 6 | """n-Chain environment 7 | 8 | This game presents moves along a linear chain of states, with two actions: 9 | 0) forward, which moves along the chain but returns no reward 10 | 1) backward, which returns to the beginning and has a small reward 11 | 12 | The end of the chain, however, presents a large reward, and by moving 13 | 'forward' at the end of the chain this large reward can be repeated. 14 | 15 | At each action, there is a small probability that the agent 'slips' and the 16 | opposite transition is instead taken. 17 | 18 | The observed state is the current state in the chain (0 to n-1). 19 | 20 | This environment is described in section 6.1 of: 21 | A Bayesian Framework for Reinforcement Learning by Malcolm Strens (2000) 22 | http://ceit.aut.ac.ir/~shiry/lecture/machine-learning/papers/BRL-2000.pdf 23 | """ 24 | def __init__(self, n=5, slip=0.2, small=2, large=10): 25 | self.n = n 26 | self.slip = slip # probability of 'slipping' an action 27 | self.small = small # payout for 'backwards' action 28 | self.large = large # payout at end of chain for 'forwards' action 29 | self.state = 0 # Start at beginning of the chain 30 | self.action_space = spaces.Discrete(2) 31 | self.observation_space = spaces.Discrete(self.n) 32 | self.seed() 33 | 34 | def seed(self, seed=None): 35 | self.np_random, seed = seeding.np_random(seed) 36 | return [seed] 37 | 38 | def step(self, action): 39 | assert self.action_space.contains(action) 40 | if self.np_random.rand() < self.slip: 41 | action = not action # agent slipped, reverse action taken 42 | if action: # 'backwards': go back to the beginning, get small reward 43 | reward = self.small 44 | self.state = 0 45 | elif self.state < self.n - 1: # 'forwards': go up along the chain 46 | reward = 0 47 | self.state += 1 48 | else: # 'forwards': stay at the end of the chain, collect large reward 49 | reward = self.large 50 | done = False 51 | return self.state, reward, done, {} 52 | 53 | def reset(self): 54 | self.state = 0 55 | return self.state 56 | -------------------------------------------------------------------------------- /gym/spaces/multi_discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class MultiDiscrete(Space): 6 | """ 7 | - The multi-discrete action space consists of a series of discrete action spaces with different number of actions in eachs 8 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space 9 | - It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space 10 | 11 | Note: Some environment wrappers assume a value of 0 always represents the NOOP action. 12 | 13 | e.g. Nintendo Game Controller 14 | - Can be conceptualized as 3 discrete action spaces: 15 | 16 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 17 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 18 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 19 | 20 | - Can be initialized as 21 | 22 | MultiDiscrete([ 5, 2, 2 ]) 23 | 24 | """ 25 | def __init__(self, nvec): 26 | 27 | """ 28 | nvec: vector of counts of each categorical variable 29 | """ 30 | assert (np.array(nvec) > 0).all(), 'nvec (counts) have to be positive' 31 | self.nvec = np.asarray(nvec, dtype=np.int64) 32 | 33 | super(MultiDiscrete, self).__init__(self.nvec.shape, np.int64) 34 | 35 | def sample(self): 36 | return (self.np_random.random_sample(self.nvec.shape)*self.nvec).astype(self.dtype) 37 | 38 | def contains(self, x): 39 | if isinstance(x, list): 40 | x = np.array(x) # Promote list to array for contains check 41 | # if nvec is uint32 and space dtype is uint32, then 0 <= x < self.nvec guarantees that x 42 | # is within correct bounds for space dtype (even though x does not have to be unsigned) 43 | return x.shape == self.shape and (0 <= x).all() and (x < self.nvec).all() 44 | 45 | def to_jsonable(self, sample_n): 46 | return [sample.tolist() for sample in sample_n] 47 | 48 | def from_jsonable(self, sample_n): 49 | return np.array(sample_n) 50 | 51 | def __repr__(self): 52 | return "MultiDiscrete({})".format(self.nvec) 53 | 54 | def __eq__(self, other): 55 | return isinstance(other, MultiDiscrete) and np.all(self.nvec == other.nvec) 56 | -------------------------------------------------------------------------------- /gym/wrappers/test_atari_preprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | from gym.wrappers import AtariPreprocessing 4 | import pytest 5 | 6 | pytest.importorskip('atari_py') 7 | 8 | 9 | @pytest.fixture(scope='module') 10 | def env_fn(): 11 | return lambda: gym.make('PongNoFrameskip-v4') 12 | 13 | 14 | def test_atari_preprocessing_grayscale(env_fn): 15 | import cv2 16 | env1 = env_fn() 17 | env2 = AtariPreprocessing(env_fn(), screen_size=84, grayscale_obs=True, frame_skip=1, noop_max=0) 18 | env3 = AtariPreprocessing(env_fn(), screen_size=84, grayscale_obs=False, frame_skip=1, noop_max=0) 19 | env1.seed(0) 20 | env2.seed(0) 21 | env3.seed(0) 22 | obs1 = env1.reset() 23 | obs2 = env2.reset() 24 | obs3 = env3.reset() 25 | assert obs1.shape == (210, 160, 3) 26 | assert obs2.shape == (84, 84) 27 | assert obs3.shape == (84, 84, 3) 28 | assert np.allclose(obs3, cv2.resize(obs1, (84, 84), interpolation=cv2.INTER_AREA)) 29 | obs3_gray = cv2.cvtColor(obs3, cv2.COLOR_RGB2GRAY) 30 | # the edges of the numbers do not render quite the same in the grayscale, so we ignore them 31 | assert np.allclose(obs2[10:38], obs3_gray[10:38]) 32 | # the paddle also do not render quite the same 33 | assert np.allclose(obs2[44:], obs3_gray[44:]) 34 | 35 | env1.close() 36 | env2.close() 37 | env3.close() 38 | 39 | 40 | def test_atari_preprocessing_scale(env_fn): 41 | # arbitrarily chosen number for stepping into env. and ensuring all observations are in the required range 42 | max_test_steps = 10 43 | 44 | for grayscale in [True, False]: 45 | for scaled in [True, False]: 46 | env = AtariPreprocessing(env_fn(), screen_size=84, grayscale_obs=grayscale, scale_obs=scaled, 47 | frame_skip=1, noop_max=0) 48 | obs = env.reset().flatten() 49 | done, step_i = False, 0 50 | max_obs = 1 if scaled else 255 51 | assert (0 <= obs).all() and (obs <= max_obs).all(), 'Obs. must be in range [0,{}]'.format(max_obs) 52 | while not done or step_i <= max_test_steps: 53 | obs, _, done, _ = env.step(env.action_space.sample()) 54 | obs = obs.flatten() 55 | assert (0 <= obs).all() and (obs <= max_obs).all(), 'Obs. must be in range [0,{}]'.format(max_obs) 56 | step_i += 1 57 | 58 | env.close() 59 | -------------------------------------------------------------------------------- /gym/envs/tests/test_envs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym import envs 5 | from gym.envs.tests.spec_list import spec_list 6 | 7 | # This runs a smoketest on each official registered env. We may want 8 | # to try also running environments which are not officially registered 9 | # envs. 10 | @pytest.mark.parametrize("spec", spec_list) 11 | def test_env(spec): 12 | # Capture warnings 13 | with pytest.warns(None) as warnings: 14 | env = spec.make() 15 | 16 | # Check that dtype is explicitly declared for gym.Box spaces 17 | for warning_msg in warnings: 18 | assert not 'autodetected dtype' in str(warning_msg.message) 19 | 20 | ob_space = env.observation_space 21 | act_space = env.action_space 22 | ob = env.reset() 23 | assert ob_space.contains(ob), 'Reset observation: {!r} not in space'.format(ob) 24 | a = act_space.sample() 25 | observation, reward, done, _info = env.step(a) 26 | assert ob_space.contains(observation), 'Step observation: {!r} not in space'.format(observation) 27 | assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env) 28 | assert isinstance(done, bool), "Expected {} to be a boolean".format(done) 29 | 30 | for mode in env.metadata.get('render.modes', []): 31 | env.render(mode=mode) 32 | 33 | # Make sure we can render the environment after close. 34 | for mode in env.metadata.get('render.modes', []): 35 | env.render(mode=mode) 36 | 37 | env.close() 38 | 39 | # Run a longer rollout on some environments 40 | def test_random_rollout(): 41 | for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]: 42 | agent = lambda ob: env.action_space.sample() 43 | ob = env.reset() 44 | for _ in range(10): 45 | assert env.observation_space.contains(ob) 46 | a = agent(ob) 47 | assert env.action_space.contains(a) 48 | (ob, _reward, done, _info) = env.step(a) 49 | if done: break 50 | env.close() 51 | 52 | 53 | def test_env_render_result_is_immutable(): 54 | from six import string_types 55 | environs = [ 56 | envs.make('Taxi-v3'), 57 | envs.make('FrozenLake-v0'), 58 | envs.make('Reverse-v0'), 59 | ] 60 | 61 | for env in environs: 62 | env.reset() 63 | output = env.render(mode='ansi') 64 | assert isinstance(output, string_types) 65 | env.close() 66 | -------------------------------------------------------------------------------- /gym/envs/toy_text/hotter_colder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import spaces 5 | from gym.utils import seeding 6 | 7 | 8 | class HotterColder(gym.Env): 9 | """Hotter Colder 10 | The goal of hotter colder is to guess closer to a randomly selected number 11 | 12 | After each step the agent receives an observation of: 13 | 0 - No guess yet submitted (only after reset) 14 | 1 - Guess is lower than the target 15 | 2 - Guess is equal to the target 16 | 3 - Guess is higher than the target 17 | 18 | The rewards is calculated as: 19 | (min(action, self.number) + self.range) / (max(action, self.number) + self.range) 20 | 21 | Ideally an agent will be able to recognise the 'scent' of a higher reward and 22 | increase the rate in which is guesses in that direction until the reward reaches 23 | its maximum 24 | """ 25 | def __init__(self): 26 | self.range = 1000 # +/- value the randomly select number can be between 27 | self.bounds = 2000 # Action space bounds 28 | 29 | self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), 30 | dtype=np.float32) 31 | self.observation_space = spaces.Discrete(4) 32 | 33 | self.number = 0 34 | self.guess_count = 0 35 | self.guess_max = 200 36 | self.observation = 0 37 | 38 | self.seed() 39 | self.reset() 40 | 41 | def seed(self, seed=None): 42 | self.np_random, seed = seeding.np_random(seed) 43 | return [seed] 44 | 45 | def step(self, action): 46 | assert self.action_space.contains(action) 47 | 48 | if action < self.number: 49 | self.observation = 1 50 | 51 | elif action == self.number: 52 | self.observation = 2 53 | 54 | elif action > self.number: 55 | self.observation = 3 56 | 57 | reward = ((min(action, self.number) + self.bounds) / (max(action, self.number) + self.bounds)) ** 2 58 | 59 | self.guess_count += 1 60 | done = self.guess_count >= self.guess_max 61 | 62 | return self.observation, reward[0], done, {"number": self.number, "guesses": self.guess_count} 63 | 64 | def reset(self): 65 | self.number = self.np_random.uniform(-self.range, self.range) 66 | self.guess_count = 0 67 | self.observation = 0 68 | return self.observation 69 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_pen_touch_sensors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/swimmer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 40 | -------------------------------------------------------------------------------- /docs/agents.md: -------------------------------------------------------------------------------- 1 | # Agents 2 | 3 | An "agent" describes the method of running an RL algorithm against an environment in the gym. The agent may contain the algorithm itself or simply provide an integration between an algorithm and the gym environments. 4 | 5 | ## RandomAgent 6 | 7 | A sample agent located in this repo at `gym/examples/agents/random_agent.py`. This simple agent leverages the environments ability to produce a random valid action and does so for each step. 8 | 9 | ## cem.py 10 | 11 | A generic Cross-Entropy agent located in this repo at `gym/examples/agents/cem.py`. This agent defaults to 10 iterations of 25 episodes considering the top 20% "elite". 12 | 13 | ## dqn 14 | 15 | This is a very basic DQN (with experience replay) implementation, which uses OpenAI's gym environment and Keras/Theano neural networks. [/sherjilozair/dqn](https://github.com/sherjilozair/dqn) 16 | 17 | ## Simple DQN 18 | 19 | Simple, fast and easy to extend DQN implementation using [Neon](https://github.com/NervanaSystems/neon) deep learning library. Comes with out-of-box tools to train, test and visualize models. For details see [this blog post](https://www.nervanasys.com/deep-reinforcement-learning-with-neon/) or check out the [repo](https://github.com/tambetm/simple_dqn). 20 | 21 | ## AgentNet 22 | A library that allows you to develop custom deep/convolutional/recurrent reinforcement learning agent with full integration with Theano/Lasagne. Also contains a toolkit for various reinforcement learning algorithms, policies, memory augmentations, etc. 23 | 24 | - The repo's here: [AgentNet](https://github.com/yandexdataschool/AgentNet) 25 | - [A step-by-step demo for Atari SpaceInvaders ](https://github.com/yandexdataschool/AgentNet/blob/master/examples/Playing%20Atari%20with%20Deep%20Reinforcement%20Learning%20%28OpenAI%20Gym%29.ipynb) 26 | 27 | ## rllab 28 | 29 | a framework for developing and evaluating reinforcement learning algorithms, fully compatible with OpenAI Gym. It includes a wide range of continuous control tasks plus implementations of many algorithms. [/rllab/rllab](https://github.com/rllab/rllab) 30 | 31 | ## [keras-rl](https://github.com/matthiasplappert/keras-rl) 32 | 33 | [keras-rl](https://github.com/matthiasplappert/keras-rl) implements some state-of-the art deep reinforcement learning algorithms. It was built with OpenAI Gym in mind, and also built on top of the deep learning library [Keras](https://keras.io/) and utilises similar design patterns like callbacks and user-definable metrics. 34 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_egg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/reacher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_block.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /gym/wrappers/filter_observation.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from gym import spaces 4 | from gym import ObservationWrapper 5 | 6 | 7 | class FilterObservation(ObservationWrapper): 8 | """Filter dictionary observations by their keys. 9 | 10 | Args: 11 | env: The environment to wrap. 12 | filter_keys: List of keys to be included in the observations. 13 | 14 | Raises: 15 | ValueError: If observation keys in not instance of None or 16 | iterable. 17 | ValueError: If any of the `filter_keys` are not included in 18 | the original `env`'s observation space 19 | 20 | """ 21 | def __init__(self, env, filter_keys=None): 22 | super(FilterObservation, self).__init__(env) 23 | 24 | wrapped_observation_space = env.observation_space 25 | assert isinstance(wrapped_observation_space, spaces.Dict), ( 26 | "FilterObservationWrapper is only usable with dict observations.") 27 | 28 | observation_keys = wrapped_observation_space.spaces.keys() 29 | 30 | if filter_keys is None: 31 | filter_keys = tuple(observation_keys) 32 | 33 | missing_keys = set( 34 | key for key in filter_keys if key not in observation_keys) 35 | 36 | if missing_keys: 37 | raise ValueError( 38 | "All the filter_keys must be included in the " 39 | "original obsrevation space.\n" 40 | "Filter keys: {filter_keys}\n" 41 | "Observation keys: {observation_keys}\n" 42 | "Missing keys: {missing_keys}".format( 43 | filter_keys=filter_keys, 44 | observation_keys=observation_keys, 45 | missing_keys=missing_keys, 46 | )) 47 | 48 | self.observation_space = type(wrapped_observation_space)([ 49 | (name, copy.deepcopy(space)) 50 | for name, space in wrapped_observation_space.spaces.items() 51 | if name in filter_keys 52 | ]) 53 | 54 | self._env = env 55 | self._filter_keys = tuple(filter_keys) 56 | 57 | def observation(self, observation): 58 | filter_observation = self._filter_observation(observation) 59 | return filter_observation 60 | 61 | def _filter_observation(self, observation): 62 | observation = type(observation)([ 63 | (name, value) 64 | for name, value in observation.items() 65 | if name in self._filter_keys 66 | ]) 67 | return observation 68 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_egg_touch_sensors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /gym/vector/tests/test_sync_vector_env.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym.spaces import Box 5 | from gym.vector.tests.utils import make_env 6 | 7 | from gym.vector.sync_vector_env import SyncVectorEnv 8 | 9 | def test_create_sync_vector_env(): 10 | env_fns = [make_env('CubeCrash-v0', i) for i in range(8)] 11 | try: 12 | env = SyncVectorEnv(env_fns) 13 | finally: 14 | env.close() 15 | 16 | assert env.num_envs == 8 17 | 18 | 19 | def test_reset_sync_vector_env(): 20 | env_fns = [make_env('CubeCrash-v0', i) for i in range(8)] 21 | try: 22 | env = SyncVectorEnv(env_fns) 23 | observations = env.reset() 24 | finally: 25 | env.close() 26 | 27 | assert isinstance(env.observation_space, Box) 28 | assert isinstance(observations, np.ndarray) 29 | assert observations.dtype == env.observation_space.dtype 30 | assert observations.shape == (8,) + env.single_observation_space.shape 31 | assert observations.shape == env.observation_space.shape 32 | 33 | 34 | @pytest.mark.parametrize('use_single_action_space', [True, False]) 35 | def test_step_sync_vector_env(use_single_action_space): 36 | env_fns = [make_env('CubeCrash-v0', i) for i in range(8)] 37 | try: 38 | env = SyncVectorEnv(env_fns) 39 | observations = env.reset() 40 | if use_single_action_space: 41 | actions = [env.single_action_space.sample() for _ in range(8)] 42 | else: 43 | actions = env.action_space.sample() 44 | observations, rewards, dones, _ = env.step(actions) 45 | finally: 46 | env.close() 47 | 48 | assert isinstance(env.observation_space, Box) 49 | assert isinstance(observations, np.ndarray) 50 | assert observations.dtype == env.observation_space.dtype 51 | assert observations.shape == (8,) + env.single_observation_space.shape 52 | assert observations.shape == env.observation_space.shape 53 | 54 | assert isinstance(rewards, np.ndarray) 55 | assert isinstance(rewards[0], (float, np.floating)) 56 | assert rewards.ndim == 1 57 | assert rewards.size == 8 58 | 59 | assert isinstance(dones, np.ndarray) 60 | assert dones.dtype == np.bool_ 61 | assert dones.ndim == 1 62 | assert dones.size == 8 63 | 64 | 65 | def test_check_observations_sync_vector_env(): 66 | # CubeCrash-v0 - observation_space: Box(40, 32, 3) 67 | env_fns = [make_env('CubeCrash-v0', i) for i in range(8)] 68 | # MemorizeDigits-v0 - observation_space: Box(24, 32, 3) 69 | env_fns[1] = make_env('MemorizeDigits-v0', 1) 70 | with pytest.raises(RuntimeError): 71 | env = SyncVectorEnv(env_fns) 72 | env.close() 73 | -------------------------------------------------------------------------------- /gym/vector/utils/spaces.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | from gym.spaces import Box, Discrete, MultiDiscrete, MultiBinary, Tuple, Dict 5 | 6 | _BaseGymSpaces = (Box, Discrete, MultiDiscrete, MultiBinary) 7 | __all__ = ['_BaseGymSpaces', 'batch_space'] 8 | 9 | def batch_space(space, n=1): 10 | """Create a (batched) space, containing multiple copies of a single space. 11 | 12 | Parameters 13 | ---------- 14 | space : `gym.spaces.Space` instance 15 | Space (e.g. the observation space) for a single environment in the 16 | vectorized environment. 17 | 18 | n : int 19 | Number of environments in the vectorized environment. 20 | 21 | Returns 22 | ------- 23 | batched_space : `gym.spaces.Space` instance 24 | Space (e.g. the observation space) for a batch of environments in the 25 | vectorized environment. 26 | 27 | Example 28 | ------- 29 | >>> from gym.spaces import Box, Dict 30 | >>> space = Dict({ 31 | ... 'position': Box(low=0, high=1, shape=(3,), dtype=np.float32), 32 | ... 'velocity': Box(low=0, high=1, shape=(2,), dtype=np.float32)}) 33 | >>> batch_space(space, n=5) 34 | Dict(position:Box(5, 3), velocity:Box(5, 2)) 35 | """ 36 | if isinstance(space, _BaseGymSpaces): 37 | return batch_space_base(space, n=n) 38 | elif isinstance(space, Tuple): 39 | return batch_space_tuple(space, n=n) 40 | elif isinstance(space, Dict): 41 | return batch_space_dict(space, n=n) 42 | else: 43 | raise NotImplementedError() 44 | 45 | def batch_space_base(space, n=1): 46 | if isinstance(space, Box): 47 | repeats = tuple([n] + [1] * space.low.ndim) 48 | low, high = np.tile(space.low, repeats), np.tile(space.high, repeats) 49 | return Box(low=low, high=high, dtype=space.dtype) 50 | 51 | elif isinstance(space, Discrete): 52 | return MultiDiscrete(np.full((n,), space.n, dtype=space.dtype)) 53 | 54 | elif isinstance(space, MultiDiscrete): 55 | repeats = tuple([n] + [1] * space.nvec.ndim) 56 | high = np.tile(space.nvec, repeats) - 1 57 | return Box(low=np.zeros_like(high), high=high, dtype=space.dtype) 58 | 59 | elif isinstance(space, MultiBinary): 60 | return Box(low=0, high=1, shape=(n,) + space.shape, dtype=space.dtype) 61 | 62 | else: 63 | raise NotImplementedError() 64 | 65 | def batch_space_tuple(space, n=1): 66 | return Tuple(tuple(batch_space(subspace, n=n) for subspace in space.spaces)) 67 | 68 | def batch_space_dict(space, n=1): 69 | return Dict(OrderedDict([(key, batch_space(subspace, n=n)) 70 | for (key, subspace) in space.spaces.items()])) 71 | -------------------------------------------------------------------------------- /examples/agents/keyboard_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys, gym, time 5 | 6 | # 7 | # Test yourself as a learning agent! Pass environment name as a command-line argument, for example: 8 | # 9 | # python keyboard_agent.py SpaceInvadersNoFrameskip-v4 10 | # 11 | 12 | env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1]) 13 | 14 | if not hasattr(env.action_space, 'n'): 15 | raise Exception('Keyboard agent only supports discrete action spaces') 16 | ACTIONS = env.action_space.n 17 | SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you 18 | # can test what skip is still usable. 19 | 20 | human_agent_action = 0 21 | human_wants_restart = False 22 | human_sets_pause = False 23 | 24 | def key_press(key, mod): 25 | global human_agent_action, human_wants_restart, human_sets_pause 26 | if key==0xff0d: human_wants_restart = True 27 | if key==32: human_sets_pause = not human_sets_pause 28 | a = int( key - ord('0') ) 29 | if a <= 0 or a >= ACTIONS: return 30 | human_agent_action = a 31 | 32 | def key_release(key, mod): 33 | global human_agent_action 34 | a = int( key - ord('0') ) 35 | if a <= 0 or a >= ACTIONS: return 36 | if human_agent_action == a: 37 | human_agent_action = 0 38 | 39 | env.render() 40 | env.unwrapped.viewer.window.on_key_press = key_press 41 | env.unwrapped.viewer.window.on_key_release = key_release 42 | 43 | def rollout(env): 44 | global human_agent_action, human_wants_restart, human_sets_pause 45 | human_wants_restart = False 46 | obser = env.reset() 47 | skip = 0 48 | total_reward = 0 49 | total_timesteps = 0 50 | while 1: 51 | if not skip: 52 | #print("taking action {}".format(human_agent_action)) 53 | a = human_agent_action 54 | total_timesteps += 1 55 | skip = SKIP_CONTROL 56 | else: 57 | skip -= 1 58 | 59 | obser, r, done, info = env.step(a) 60 | if r != 0: 61 | print("reward %0.3f" % r) 62 | total_reward += r 63 | window_still_open = env.render() 64 | if window_still_open==False: return False 65 | if done: break 66 | if human_wants_restart: break 67 | while human_sets_pause: 68 | env.render() 69 | time.sleep(0.1) 70 | time.sleep(0.1) 71 | print("timesteps %i reward %0.2f" % (total_timesteps, total_reward)) 72 | 73 | print("ACTIONS={}".format(ACTIONS)) 74 | print("Press keys 1 2 3 ... to take actions 1 2 3 ...") 75 | print("No keys pressed is taking action 0") 76 | 77 | while 1: 78 | window_still_open = rollout(env) 79 | if window_still_open==False: break 80 | 81 | -------------------------------------------------------------------------------- /gym/spaces/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym.spaces import Box 4 | from gym.spaces import Discrete 5 | from gym.spaces import MultiDiscrete 6 | from gym.spaces import MultiBinary 7 | from gym.spaces import Tuple 8 | from gym.spaces import Dict 9 | 10 | 11 | def flatdim(space): 12 | if isinstance(space, Box): 13 | return int(np.prod(space.shape)) 14 | elif isinstance(space, Discrete): 15 | return int(space.n) 16 | elif isinstance(space, Tuple): 17 | return int(sum([flatdim(s) for s in space.spaces])) 18 | elif isinstance(space, Dict): 19 | return int(sum([flatdim(s) for s in space.spaces.values()])) 20 | elif isinstance(space, MultiBinary): 21 | return int(space.n) 22 | elif isinstance(space, MultiDiscrete): 23 | return int(np.prod(space.shape)) 24 | else: 25 | raise NotImplementedError 26 | 27 | 28 | def flatten(space, x): 29 | if isinstance(space, Box): 30 | return np.asarray(x, dtype=np.float32).flatten() 31 | elif isinstance(space, Discrete): 32 | onehot = np.zeros(space.n, dtype=np.float32) 33 | onehot[x] = 1.0 34 | return onehot 35 | elif isinstance(space, Tuple): 36 | return np.concatenate([flatten(s, x_part) for x_part, s in zip(x, space.spaces)]) 37 | elif isinstance(space, Dict): 38 | return np.concatenate([flatten(s, x[key]) for key, s in space.spaces.items()]) 39 | elif isinstance(space, MultiBinary): 40 | return np.asarray(x).flatten() 41 | elif isinstance(space, MultiDiscrete): 42 | return np.asarray(x).flatten() 43 | else: 44 | raise NotImplementedError 45 | 46 | 47 | def unflatten(space, x): 48 | if isinstance(space, Box): 49 | return np.asarray(x, dtype=np.float32).reshape(space.shape) 50 | elif isinstance(space, Discrete): 51 | return int(np.nonzero(x)[0][0]) 52 | elif isinstance(space, Tuple): 53 | dims = [flatdim(s) for s in space.spaces] 54 | list_flattened = np.split(x, np.cumsum(dims)[:-1]) 55 | list_unflattened = [unflatten(s, flattened) 56 | for flattened, s in zip(list_flattened, space.spaces)] 57 | return tuple(list_unflattened) 58 | elif isinstance(space, Dict): 59 | dims = [flatdim(s) for s in space.spaces.values()] 60 | list_flattened = np.split(x, np.cumsum(dims)[:-1]) 61 | list_unflattened = [(key, unflatten(s, flattened)) 62 | for flattened, (key, s) in zip(list_flattened, space.spaces.items())] 63 | return dict(list_unflattened) 64 | elif isinstance(space, MultiBinary): 65 | return np.asarray(x).reshape(space.shape) 66 | elif isinstance(space, MultiDiscrete): 67 | return np.asarray(x).reshape(space.shape) 68 | else: 69 | raise NotImplementedError 70 | -------------------------------------------------------------------------------- /gym/envs/mujoco/striker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | self._striked = False 9 | self._min_strike_dist = np.inf 10 | self.strike_threshold = 0.1 11 | mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5) 12 | 13 | def step(self, a): 14 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") 15 | vec_2 = self.get_body_com("object") - self.get_body_com("goal") 16 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2)) 17 | 18 | if np.linalg.norm(vec_1) < self.strike_threshold: 19 | self._striked = True 20 | self._strike_pos = self.get_body_com("tips_arm") 21 | 22 | if self._striked: 23 | vec_3 = self.get_body_com("object") - self._strike_pos 24 | reward_near = - np.linalg.norm(vec_3) 25 | else: 26 | reward_near = - np.linalg.norm(vec_1) 27 | 28 | reward_dist = - np.linalg.norm(self._min_strike_dist) 29 | reward_ctrl = - np.square(a).sum() 30 | reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near 31 | 32 | self.do_simulation(a, self.frame_skip) 33 | ob = self._get_obs() 34 | done = False 35 | return ob, reward, done, dict(reward_dist=reward_dist, 36 | reward_ctrl=reward_ctrl) 37 | 38 | def viewer_setup(self): 39 | self.viewer.cam.trackbodyid = 0 40 | self.viewer.cam.distance = 4.0 41 | 42 | def reset_model(self): 43 | self._min_strike_dist = np.inf 44 | self._striked = False 45 | self._strike_pos = None 46 | 47 | qpos = self.init_qpos 48 | 49 | self.ball = np.array([0.5, -0.175]) 50 | while True: 51 | self.goal = np.concatenate([ 52 | self.np_random.uniform(low=0.15, high=0.7, size=1), 53 | self.np_random.uniform(low=0.1, high=1.0, size=1)]) 54 | if np.linalg.norm(self.ball - self.goal) > 0.17: 55 | break 56 | 57 | qpos[-9:-7] = [self.ball[1], self.ball[0]] 58 | qpos[-7:-5] = self.goal 59 | diff = self.ball - self.goal 60 | angle = -np.arctan(diff[0] / (diff[1] + 1e-8)) 61 | qpos[-1] = angle / 3.14 62 | qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1, 63 | size=self.model.nv) 64 | qvel[7:] = 0 65 | self.set_state(qpos, qvel) 66 | return self._get_obs() 67 | 68 | def _get_obs(self): 69 | return np.concatenate([ 70 | self.sim.data.qpos.flat[:7], 71 | self.sim.data.qvel.flat[:7], 72 | self.get_body_com("tips_arm"), 73 | self.get_body_com("object"), 74 | self.get_body_com("goal"), 75 | ]) 76 | -------------------------------------------------------------------------------- /gym/envs/robotics/README.md: -------------------------------------------------------------------------------- 1 | # Robotics environments 2 | 3 | Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics). 4 | 5 | If you use these environments, please cite the following paper: 6 | 7 | ``` 8 | @misc{1802.09464, 9 | Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba}, 10 | Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research}, 11 | Year = {2018}, 12 | Eprint = {arXiv:1802.09464}, 13 | } 14 | ``` 15 | 16 | ## Fetch environments 17 | 18 | 19 | [FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position. 20 | 21 | 22 | 23 | 24 | [FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal. 25 | 26 | 27 | 28 | 29 | [FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position. 30 | 31 | 32 | 33 | 34 | [FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table. 35 | 36 | ## Shadow Dexterous Hand environments 37 | 38 | 39 | [HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm. 40 | 41 | 42 | 43 | 44 | [HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation. 45 | 46 | 47 | 48 | 49 | [HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation. 50 | 51 | 52 | 53 | 54 | [HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation. 55 | -------------------------------------------------------------------------------- /gym/envs/tests/test_determinism.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from gym.envs.tests.spec_list import spec_list 5 | 6 | @pytest.mark.parametrize("spec", spec_list) 7 | def test_env(spec): 8 | # Note that this precludes running this test in multiple 9 | # threads. However, we probably already can't do multithreading 10 | # due to some environments. 11 | env1 = spec.make() 12 | env1.seed(0) 13 | initial_observation1 = env1.reset() 14 | env1.action_space.seed(0) 15 | action_samples1 = [env1.action_space.sample() for i in range(4)] 16 | step_responses1 = [env1.step(action) for action in action_samples1] 17 | env1.close() 18 | 19 | env2 = spec.make() 20 | env2.seed(0) 21 | initial_observation2 = env2.reset() 22 | env2.action_space.seed(0) 23 | action_samples2 = [env2.action_space.sample() for i in range(4)] 24 | step_responses2 = [env2.step(action) for action in action_samples2] 25 | env2.close() 26 | 27 | for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)): 28 | try: 29 | assert_equals(action_sample1, action_sample2) 30 | except AssertionError: 31 | print('env1.action_space=', env1.action_space) 32 | print('env2.action_space=', env2.action_space) 33 | print('action_samples1=', action_samples1) 34 | print('action_samples2=', action_samples2) 35 | print('[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2)) 36 | raise 37 | 38 | # Don't check rollout equality if it's a a nondeterministic 39 | # environment. 40 | if spec.nondeterministic: 41 | return 42 | 43 | assert_equals(initial_observation1, initial_observation2) 44 | 45 | for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)): 46 | assert_equals(o1, o2, '[{}] '.format(i)) 47 | assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2) 48 | assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2) 49 | 50 | # Go returns a Pachi game board in info, which doesn't 51 | # properly check equality. For now, we hack around this by 52 | # just skipping Go. 53 | if spec.id not in ['Go9x9-v0', 'Go19x19-v0']: 54 | assert_equals(i1, i2, '[{}] '.format(i)) 55 | 56 | def assert_equals(a, b, prefix=None): 57 | assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b) 58 | if isinstance(a, dict): 59 | assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b) 60 | 61 | for k in a.keys(): 62 | v_a = a[k] 63 | v_b = b[k] 64 | assert_equals(v_a, v_b) 65 | elif isinstance(a, np.ndarray): 66 | np.testing.assert_array_equal(a, b) 67 | elif isinstance(a, tuple): 68 | for elem_from_a, elem_from_b in zip(a, b): 69 | assert_equals(elem_from_a, elem_from_b) 70 | else: 71 | assert a == b 72 | -------------------------------------------------------------------------------- /gym/envs/mujoco/half_cheetah_v3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | DEFAULT_CAMERA_CONFIG = { 7 | 'distance': 4.0, 8 | } 9 | 10 | 11 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 12 | def __init__(self, 13 | xml_file='half_cheetah.xml', 14 | forward_reward_weight=1.0, 15 | ctrl_cost_weight=0.1, 16 | reset_noise_scale=0.1, 17 | exclude_current_positions_from_observation=True): 18 | utils.EzPickle.__init__(**locals()) 19 | 20 | self._forward_reward_weight = forward_reward_weight 21 | 22 | self._ctrl_cost_weight = ctrl_cost_weight 23 | 24 | self._reset_noise_scale = reset_noise_scale 25 | 26 | self._exclude_current_positions_from_observation = ( 27 | exclude_current_positions_from_observation) 28 | 29 | mujoco_env.MujocoEnv.__init__(self, xml_file, 5) 30 | 31 | def control_cost(self, action): 32 | control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) 33 | return control_cost 34 | 35 | def step(self, action): 36 | x_position_before = self.sim.data.qpos[0] 37 | self.do_simulation(action, self.frame_skip) 38 | x_position_after = self.sim.data.qpos[0] 39 | x_velocity = ((x_position_after - x_position_before) 40 | / self.dt) 41 | 42 | ctrl_cost = self.control_cost(action) 43 | 44 | forward_reward = self._forward_reward_weight * x_velocity 45 | 46 | observation = self._get_obs() 47 | reward = forward_reward - ctrl_cost 48 | done = False 49 | info = { 50 | 'x_position': x_position_after, 51 | 'x_velocity': x_velocity, 52 | 53 | 'reward_run': forward_reward, 54 | 'reward_ctrl': -ctrl_cost 55 | } 56 | 57 | return observation, reward, done, info 58 | 59 | def _get_obs(self): 60 | position = self.sim.data.qpos.flat.copy() 61 | velocity = self.sim.data.qvel.flat.copy() 62 | 63 | if self._exclude_current_positions_from_observation: 64 | position = position[1:] 65 | 66 | observation = np.concatenate((position, velocity)).ravel() 67 | return observation 68 | 69 | def reset_model(self): 70 | noise_low = -self._reset_noise_scale 71 | noise_high = self._reset_noise_scale 72 | 73 | qpos = self.init_qpos + self.np_random.uniform( 74 | low=noise_low, high=noise_high, size=self.model.nq) 75 | qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn( 76 | self.model.nv) 77 | 78 | self.set_state(qpos, qvel) 79 | 80 | observation = self._get_obs() 81 | return observation 82 | 83 | def viewer_setup(self): 84 | for key, value in DEFAULT_CAMERA_CONFIG.items(): 85 | if isinstance(value, np.ndarray): 86 | getattr(self.viewer.cam, key)[:] = value 87 | else: 88 | setattr(self.viewer.cam, key, value) 89 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/hopper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 49 | -------------------------------------------------------------------------------- /gym/envs/tests/test_mujoco_v2_to_v3_conversion.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from gym import envs 4 | from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE 5 | 6 | 7 | def verify_environments_match(old_environment_id, 8 | new_environment_id, 9 | seed=1, 10 | num_actions=1000): 11 | old_environment = envs.make(old_environment_id) 12 | new_environment = envs.make(new_environment_id) 13 | 14 | old_environment.seed(seed) 15 | new_environment.seed(seed) 16 | 17 | old_reset_observation = old_environment.reset() 18 | new_reset_observation = new_environment.reset() 19 | 20 | np.testing.assert_allclose(old_reset_observation, new_reset_observation) 21 | 22 | for i in range(num_actions): 23 | action = old_environment.action_space.sample() 24 | old_observation, old_reward, old_done, old_info = old_environment.step( 25 | action) 26 | new_observation, new_reward, new_done, new_info = new_environment.step( 27 | action) 28 | 29 | eps = 1e-6 30 | np.testing.assert_allclose(old_observation, new_observation, atol=eps) 31 | np.testing.assert_allclose(old_reward, new_reward, atol=eps) 32 | np.testing.assert_allclose(old_done, new_done, atol=eps) 33 | 34 | for key in old_info: 35 | np.testing.assert_allclose(old_info[key], new_info[key], atol=eps) 36 | 37 | 38 | @unittest.skipIf(skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE) 39 | class Mujocov2Tov3ConversionTest(unittest.TestCase): 40 | def test_environments_match(self): 41 | test_cases = ( 42 | { 43 | 'old_id': 'Swimmer-v2', 44 | 'new_id': 'Swimmer-v3' 45 | }, 46 | { 47 | 'old_id': 'Hopper-v2', 48 | 'new_id': 'Hopper-v3' 49 | }, 50 | { 51 | 'old_id': 'Walker2d-v2', 52 | 'new_id': 'Walker2d-v3' 53 | }, 54 | { 55 | 'old_id': 'HalfCheetah-v2', 56 | 'new_id': 'HalfCheetah-v3' 57 | }, 58 | { 59 | 'old_id': 'Ant-v2', 60 | 'new_id': 'Ant-v3' 61 | }, 62 | { 63 | 'old_id': 'Humanoid-v2', 64 | 'new_id': 'Humanoid-v3' 65 | }, 66 | ) 67 | 68 | for test_case in test_cases: 69 | verify_environments_match(test_case['old_id'], test_case['new_id']) 70 | 71 | # Raises KeyError because the new envs have extra info 72 | with self.assertRaises(KeyError): 73 | verify_environments_match('Swimmer-v3', 'Swimmer-v2') 74 | 75 | # Raises KeyError because the new envs have extra info 76 | with self.assertRaises(KeyError): 77 | verify_environments_match('Humanoid-v3', 'Humanoid-v2') 78 | 79 | # Raises KeyError because the new envs have extra info 80 | with self.assertRaises(KeyError): 81 | verify_environments_match('Swimmer-v3', 'Swimmer-v2') 82 | 83 | 84 | if __name__ == '__main__': 85 | unittest.main() 86 | -------------------------------------------------------------------------------- /gym/wrappers/test_filter_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | import gym 5 | from gym import spaces 6 | from gym.wrappers.filter_observation import FilterObservation 7 | 8 | 9 | class FakeEnvironment(gym.Env): 10 | def __init__(self, observation_keys=('state')): 11 | self.observation_space = spaces.Dict({ 12 | name: spaces.Box(shape=(2, ), low=-1, high=1, dtype=np.float32) 13 | for name in observation_keys 14 | }) 15 | self.action_space = spaces.Box( 16 | shape=(1, ), low=-1, high=1, dtype=np.float32) 17 | 18 | def render(self, width=32, height=32, *args, **kwargs): 19 | del args 20 | del kwargs 21 | image_shape = (height, width, 3) 22 | return np.zeros(image_shape, dtype=np.uint8) 23 | 24 | def reset(self): 25 | observation = self.observation_space.sample() 26 | return observation 27 | 28 | def step(self, action): 29 | del action 30 | observation = self.observation_space.sample() 31 | reward, terminal, info = 0.0, False, {} 32 | return observation, reward, terminal, info 33 | 34 | 35 | FILTER_OBSERVATION_TEST_CASES = ( 36 | (('key1', 'key2'), ('key1', )), 37 | (('key1', 'key2'), ('key1', 'key2')), 38 | (('key1', ), None), 39 | (('key1', ), ('key1', )), 40 | ) 41 | 42 | ERROR_TEST_CASES = ( 43 | ('key', ValueError, "All the filter_keys must be included..*"), 44 | (False, TypeError, "'bool' object is not iterable"), 45 | (1, TypeError, "'int' object is not iterable"), 46 | ) 47 | 48 | 49 | class TestFilterObservation(object): 50 | @pytest.mark.parametrize("observation_keys,filter_keys", 51 | FILTER_OBSERVATION_TEST_CASES) 52 | def test_filter_observation(self, observation_keys, filter_keys): 53 | env = FakeEnvironment(observation_keys=observation_keys) 54 | 55 | # Make sure we are testing the right environment for the test. 56 | observation_space = env.observation_space 57 | assert isinstance(observation_space, spaces.Dict) 58 | 59 | wrapped_env = FilterObservation(env, filter_keys=filter_keys) 60 | 61 | assert isinstance(wrapped_env.observation_space, spaces.Dict) 62 | 63 | if filter_keys is None: 64 | filter_keys = tuple(observation_keys) 65 | 66 | assert len(wrapped_env.observation_space.spaces) == len(filter_keys) 67 | assert (tuple(wrapped_env.observation_space.spaces.keys()) 68 | == tuple(filter_keys)) 69 | 70 | # Check that the added space item is consistent with the added observation. 71 | observation = wrapped_env.reset() 72 | assert (len(observation) == len(filter_keys)) 73 | 74 | @pytest.mark.parametrize("filter_keys,error_type,error_match", 75 | ERROR_TEST_CASES) 76 | def test_raises_with_incorrect_arguments(self, 77 | filter_keys, 78 | error_type, 79 | error_match): 80 | env = FakeEnvironment(observation_keys=('key1', 'key2')) 81 | 82 | ValueError 83 | 84 | with pytest.raises(error_type, match=error_match): 85 | FilterObservation(env, filter_keys=filter_keys) 86 | --------------------------------------------------------------------------------