├── .dockerignore ├── gym ├── envs │ ├── tests │ │ ├── __init__.py │ │ ├── spec_list.py │ │ ├── test_registration.py │ │ ├── test_envs.py │ │ ├── test_determinism.py │ │ └── test_envs_semantics.py │ ├── algorithmic │ │ ├── tests │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── copy_.py │ │ ├── reverse.py │ │ ├── repeat_copy.py │ │ ├── duplicated_input.py │ │ └── reversed_addition.py │ ├── robotics │ │ ├── assets │ │ │ ├── stls │ │ │ │ ├── .get │ │ │ │ ├── hand │ │ │ │ │ ├── F1.stl │ │ │ │ │ ├── F2.stl │ │ │ │ │ ├── F3.stl │ │ │ │ │ ├── palm.stl │ │ │ │ │ ├── TH1_z.stl │ │ │ │ │ ├── TH2_z.stl │ │ │ │ │ ├── TH3_z.stl │ │ │ │ │ ├── knuckle.stl │ │ │ │ │ ├── wrist.stl │ │ │ │ │ ├── lfmetacarpal.stl │ │ │ │ │ ├── forearm_electric.stl │ │ │ │ │ └── forearm_electric_cvx.stl │ │ │ │ └── fetch │ │ │ │ │ ├── estop_link.stl │ │ │ │ │ ├── laser_link.stl │ │ │ │ │ ├── gripper_link.stl │ │ │ │ │ ├── torso_fixed_link.stl │ │ │ │ │ ├── base_link_collision.stl │ │ │ │ │ ├── bellows_link_collision.stl │ │ │ │ │ ├── head_pan_link_collision.stl │ │ │ │ │ ├── l_wheel_link_collision.stl │ │ │ │ │ ├── r_wheel_link_collision.stl │ │ │ │ │ ├── elbow_flex_link_collision.stl │ │ │ │ │ ├── head_tilt_link_collision.stl │ │ │ │ │ ├── torso_lift_link_collision.stl │ │ │ │ │ ├── wrist_flex_link_collision.stl │ │ │ │ │ ├── wrist_roll_link_collision.stl │ │ │ │ │ ├── forearm_roll_link_collision.stl │ │ │ │ │ ├── shoulder_lift_link_collision.stl │ │ │ │ │ ├── shoulder_pan_link_collision.stl │ │ │ │ │ └── upperarm_roll_link_collision.stl │ │ │ ├── textures │ │ │ │ ├── block.png │ │ │ │ └── block_hidden.png │ │ │ ├── fetch │ │ │ │ ├── reach.xml │ │ │ │ ├── push.xml │ │ │ │ ├── slide.xml │ │ │ │ ├── pick_and_place.xml │ │ │ │ └── shared.xml │ │ │ └── hand │ │ │ │ ├── reach.xml │ │ │ │ ├── shared_asset.xml │ │ │ │ ├── manipulate_pen.xml │ │ │ │ ├── manipulate_egg.xml │ │ │ │ └── manipulate_block.xml │ │ ├── fetch │ │ │ ├── __init__.py │ │ │ ├── reach.py │ │ │ ├── push.py │ │ │ ├── pick_and_place.py │ │ │ └── slide.py │ │ ├── hand │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── hand_env.py │ │ ├── README.md │ │ └── utils.py │ ├── atari │ │ └── __init__.py │ ├── classic_control │ │ ├── assets │ │ │ └── clockwise.png │ │ ├── __init__.py │ │ └── pendulum.py │ ├── box2d │ │ ├── __init__.py │ │ └── test_lunar_lander.py │ ├── unittest │ │ └── __init__.py │ ├── toy_text │ │ ├── __init__.py │ │ ├── roulette.py │ │ ├── discrete.py │ │ ├── nchain.py │ │ ├── hotter_colder.py │ │ ├── guessing_game.py │ │ ├── cliffwalking.py │ │ └── blackjack.py │ ├── mujoco │ │ ├── __init__.py │ │ ├── inverted_pendulum.py │ │ ├── swimmer.py │ │ ├── half_cheetah.py │ │ ├── assets │ │ │ ├── inverted_pendulum.xml │ │ │ ├── point.xml │ │ │ ├── inverted_double_pendulum.xml │ │ │ ├── swimmer.xml │ │ │ ├── reacher.xml │ │ │ ├── hopper.xml │ │ │ └── walker2d.xml │ │ ├── walker2d.py │ │ ├── hopper.py │ │ ├── inverted_double_pendulum.py │ │ ├── reacher.py │ │ ├── ant.py │ │ ├── humanoidstandup.py │ │ ├── pusher.py │ │ ├── humanoid.py │ │ ├── thrower.py │ │ └── striker.py │ └── README.md ├── spaces │ ├── tests │ │ ├── __init__.py │ │ └── test_spaces.py │ ├── __init__.py │ ├── multi_binary.py │ ├── prng.py │ ├── discrete.py │ ├── multi_discrete.py │ ├── tuple_space.py │ ├── box.py │ └── dict_space.py ├── wrappers │ ├── tests │ │ └── __init__.py │ ├── monitoring │ │ ├── __init__.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── helpers.py │ │ │ └── test_video_recorder.py │ │ └── stats_recorder.py │ ├── __init__.py │ ├── dict.py │ ├── README.md │ └── time_limit.py ├── version.py ├── utils │ ├── reraise_impl_py2.py │ ├── reraise_impl_py3.py │ ├── __init__.py │ ├── tests │ │ ├── test_seeding.py │ │ └── test_atexit.py │ ├── json_utils.py │ ├── colorize.py │ ├── ezpickle.py │ ├── reraise.py │ ├── atomic_write.py │ ├── closer.py │ └── seeding.py ├── tests │ └── test_core.py ├── __init__.py ├── logger.py └── error.py ├── requirements_dev.txt ├── requirements.txt ├── examples ├── scripts │ ├── list_envs │ ├── sim_env │ └── benchmark_runner └── agents │ ├── _policies.py │ ├── random_agent.py │ ├── keyboard_agent.py │ └── cem.py ├── unittest.cfg ├── docs ├── misc.md ├── readme.md ├── environments.md └── agents.md ├── tox.ini ├── Makefile ├── .gitignore ├── bin ├── docker_entrypoint └── render.py ├── .travis.yml ├── CODE_OF_CONDUCT.rst ├── test.dockerfile.14.04 ├── test.dockerfile.18.04 ├── LICENSE.md ├── test.dockerfile.16.04 ├── setup.py └── scripts └── generate_json.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .tox 2 | -------------------------------------------------------------------------------- /gym/envs/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/spaces/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/wrappers/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/.get: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/envs/robotics/hand/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.10.9' 2 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # Testing 2 | pytest 3 | mock 4 | 5 | -e .[all] 6 | -------------------------------------------------------------------------------- /gym/envs/atari/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.atari.atari_env import AtariEnv 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.10.4 2 | requests>=2.0 3 | six 4 | pyglet>=1.2.0 5 | scipy==0.17.1 6 | -------------------------------------------------------------------------------- /gym/utils/reraise_impl_py2.py: -------------------------------------------------------------------------------- 1 | def reraise_impl(e, traceback): 2 | raise e.__class__, e, traceback 3 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/F1.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/F2.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/F3.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/palm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/palm.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/textures/block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/textures/block.png -------------------------------------------------------------------------------- /gym/envs/classic_control/assets/clockwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/classic_control/assets/clockwise.png -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH1_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/TH1_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH2_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/TH2_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH3_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/TH3_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/knuckle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/knuckle.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/wrist.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/wrist.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/estop_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/estop_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/laser_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/laser_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/textures/block_hidden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/textures/block_hidden.png -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/gripper_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/gripper_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/forearm_electric.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/forearm_electric.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/base_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl -------------------------------------------------------------------------------- /examples/scripts/list_envs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from gym import envs 3 | envids = [spec.id for spec in envs.registry.all()] 4 | for envid in sorted(envids): 5 | print(envid) 6 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.wrappers.monitor import Monitor 3 | from gym.wrappers.time_limit import TimeLimit 4 | from gym.wrappers.dict import FlattenDictWrapper 5 | -------------------------------------------------------------------------------- /unittest.cfg: -------------------------------------------------------------------------------- 1 | [log-capture] 2 | always-on = True 3 | clear-handlers = True 4 | date-format = None 5 | filter = -nose 6 | log-level = NOTSET 7 | 8 | [output-buffer] 9 | always-on = True 10 | stderr = True 11 | stdout = True 12 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/helpers.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import shutil 3 | import tempfile 4 | 5 | @contextlib.contextmanager 6 | def tempdir(): 7 | temp = tempfile.mkdtemp() 8 | yield temp 9 | shutil.rmtree(temp) 10 | -------------------------------------------------------------------------------- /gym/utils/reraise_impl_py3.py: -------------------------------------------------------------------------------- 1 | # http://stackoverflow.com/a/33822606 -- `from None` disables Python 3' 2 | # semi-smart exception chaining, which we don't want in this case. 3 | def reraise_impl(e, traceback): 4 | raise e.with_traceback(traceback) from None 5 | -------------------------------------------------------------------------------- /gym/envs/box2d/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.box2d.lunar_lander import LunarLander 2 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous 3 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore 4 | from gym.envs.box2d.car_racing import CarRacing 5 | -------------------------------------------------------------------------------- /gym/envs/unittest/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.unittest.cube_crash import CubeCrash 2 | from gym.envs.unittest.cube_crash import CubeCrashSparse 3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack 4 | from gym.envs.unittest.memorize_digits import MemorizeDigits 5 | 6 | -------------------------------------------------------------------------------- /docs/misc.md: -------------------------------------------------------------------------------- 1 | # Miscellaneous 2 | 3 | Here we have a bunch of tools, libs, apis, tutorials, resources, etc. provided by the community to add value to the gym ecosystem. 4 | 5 | ## OpenAIGym.jl 6 | 7 | Convenience wrapper of the OpenAI Gym for the Julia language [/tbreloff/OpenAIGym.jl](https://github.com/tbreloff/OpenAIGym.jl) -------------------------------------------------------------------------------- /gym/envs/algorithmic/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.algorithmic.copy_ import CopyEnv 2 | from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv 3 | from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv 4 | from gym.envs.algorithmic.reverse import ReverseEnv 5 | from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv 6 | -------------------------------------------------------------------------------- /gym/envs/classic_control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.classic_control.cartpole import CartPoleEnv 2 | from gym.envs.classic_control.mountain_car import MountainCarEnv 3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv 4 | from gym.envs.classic_control.pendulum import PendulumEnv 5 | from gym.envs.classic_control.acrobot import AcrobotEnv 6 | 7 | -------------------------------------------------------------------------------- /gym/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.spaces.box import Box 2 | from gym.spaces.discrete import Discrete 3 | from gym.spaces.multi_discrete import MultiDiscrete 4 | from gym.spaces.multi_binary import MultiBinary 5 | from gym.spaces.prng import seed, np_random 6 | from gym.spaces.tuple_space import Tuple 7 | from gym.spaces.dict_space import Dict 8 | 9 | __all__ = ["Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict"] 10 | -------------------------------------------------------------------------------- /gym/tests/test_core.py: -------------------------------------------------------------------------------- 1 | from gym import core 2 | 3 | class ArgumentEnv(core.Env): 4 | calls = 0 5 | 6 | def __init__(self, arg): 7 | self.calls += 1 8 | self.arg = arg 9 | 10 | def test_env_instantiation(): 11 | # This looks like a pretty trivial, but given our usage of 12 | # __new__, it's worth having. 13 | env = ArgumentEnv('arg') 14 | assert env.arg == 'arg' 15 | assert env.calls == 1 16 | -------------------------------------------------------------------------------- /gym/__init__.py: -------------------------------------------------------------------------------- 1 | import distutils.version 2 | import os 3 | import sys 4 | import warnings 5 | 6 | from gym import error 7 | from gym.utils import reraise 8 | from gym.version import VERSION as __version__ 9 | 10 | from gym.core import Env, GoalEnv, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper 11 | from gym.envs import make, spec 12 | from gym import logger 13 | 14 | __all__ = ["Env", "Space", "Wrapper", "make", "spec"] 15 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/copy_.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | class CopyEnv(algorithmic_env.TapeAlgorithmicEnv): 8 | def __init__(self, base=5, chars=True): 9 | super(CopyEnv, self).__init__(base=base, chars=chars) 10 | 11 | def target_from_input_data(self, input_data): 12 | return input_data 13 | 14 | -------------------------------------------------------------------------------- /gym/envs/box2d/test_lunar_lander.py: -------------------------------------------------------------------------------- 1 | from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander 2 | 3 | def test_lunar_lander(): 4 | _test_lander(LunarLander(), seed=0) 5 | 6 | def test_lunar_lander_continuous(): 7 | _test_lander(LunarLanderContinuous(), seed=0) 8 | 9 | def _test_lander(env, seed=None, render=False): 10 | total_reward = demo_heuristic_lander(env, seed=seed, render=render) 11 | assert total_reward > 100 12 | 13 | 14 | -------------------------------------------------------------------------------- /gym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | # These submodules should not have any import-time dependencies. 6 | # We want this since we use `utils` during our import-time sanity checks 7 | # that verify that our dependencies are actually present. 8 | from .colorize import colorize 9 | from .ezpickle import EzPickle 10 | from .reraise import reraise 11 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py3 8 | 9 | [testenv:py3] 10 | whitelist_externals=make 11 | passenv=DISPLAY TRAVIS* 12 | deps = 13 | pytest 14 | mock 15 | -e .[all] 16 | commands = 17 | pytest {posargs} 18 | 19 | -------------------------------------------------------------------------------- /gym/utils/tests/test_seeding.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.utils import seeding 3 | 4 | def test_invalid_seeds(): 5 | for seed in [-1, 'test']: 6 | try: 7 | seeding.np_random(seed) 8 | except error.Error: 9 | pass 10 | else: 11 | assert False, 'Invalid seed {} passed validation'.format(seed) 12 | 13 | def test_valid_seeds(): 14 | for seed in [0, 1]: 15 | random, seed1 = seeding.np_random(seed) 16 | assert seed == seed1 17 | -------------------------------------------------------------------------------- /docs/readme.md: -------------------------------------------------------------------------------- 1 | # Table of Contents 2 | 3 | - [Agents](agents.md) contains a listing of agents compatible with gym environments. Agents facilitate the running of an algorithm against an environment. 4 | 5 | - [Environments](environments.md) lists more environments to run your algorithms against. These do not come prepackaged with the gym. 6 | 7 | - [Miscellaneous](misc.md) is a collection of other value-add tools and utilities. These could be anything from a small convenience lib to a collection of video tutorials or a new language binding. 8 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/reverse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to reverse content over the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | 6 | from gym.envs.algorithmic import algorithmic_env 7 | 8 | class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 10 | def __init__(self, base=2): 11 | super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1) 12 | self.last = 50 13 | 14 | def target_from_input_data(self, input_str): 15 | return list(reversed(input_str)) 16 | -------------------------------------------------------------------------------- /gym/envs/toy_text/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.blackjack import BlackjackEnv 2 | from gym.envs.toy_text.roulette import RouletteEnv 3 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv 4 | from gym.envs.toy_text.nchain import NChainEnv 5 | from gym.envs.toy_text.hotter_colder import HotterColder 6 | from gym.envs.toy_text.guessing_game import GuessingGame 7 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv 8 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv 9 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install test 2 | 3 | install: 4 | pip install -r requirements.txt 5 | 6 | base: 7 | docker pull ubuntu:14.04 8 | docker tag ubuntu:14.04 quay.io/openai/gym:base 9 | docker push quay.io/openai/gym:base 10 | 11 | test: 12 | docker build -f test.dockerfile -t quay.io/openai/gym:test . 13 | docker push quay.io/openai/gym:test 14 | 15 | upload: 16 | rm -rf dist 17 | python setup.py sdist 18 | twine upload dist/* 19 | 20 | docker-build: 21 | docker build -t quay.io/openai/gym . 22 | 23 | docker-run: 24 | docker run -ti quay.io/openai/gym bash 25 | -------------------------------------------------------------------------------- /gym/envs/robotics/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.robotics.fetch_env import FetchEnv 2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv 3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv 4 | from gym.envs.robotics.fetch.push import FetchPushEnv 5 | from gym.envs.robotics.fetch.reach import FetchReachEnv 6 | 7 | from gym.envs.robotics.hand.reach import HandReachEnv 8 | from gym.envs.robotics.hand.manipulate import HandBlockEnv 9 | from gym.envs.robotics.hand.manipulate import HandEggEnv 10 | from gym.envs.robotics.hand.manipulate import HandPenEnv 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | *.py~ 4 | .DS_Store 5 | .cache 6 | .pytest_cache/ 7 | 8 | # Setuptools distribution and build folders. 9 | /dist/ 10 | /build 11 | 12 | # Virtualenv 13 | /env 14 | 15 | # Python egg metadata, regenerated from source files by setuptools. 16 | /*.egg-info 17 | 18 | *.sublime-project 19 | *.sublime-workspace 20 | 21 | logs/ 22 | 23 | .ipynb_checkpoints 24 | ghostdriver.log 25 | 26 | junk 27 | MUJOCO_LOG.txt 28 | 29 | rllab_mujoco 30 | 31 | tutorial/*.html 32 | 33 | # IDE files 34 | .eggs 35 | .tox 36 | 37 | # PyCharm project files 38 | .idea 39 | vizdoom.ini 40 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/repeat_copy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content multiple times from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv): 8 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 9 | def __init__(self, base=5): 10 | super(RepeatCopyEnv, self).__init__(base=base, chars=True) 11 | self.last = 50 12 | 13 | def target_from_input_data(self, input_data): 14 | return input_data + list(reversed(input_data)) + input_data 15 | 16 | -------------------------------------------------------------------------------- /gym/utils/tests/test_atexit.py: -------------------------------------------------------------------------------- 1 | from gym.utils.closer import Closer 2 | 3 | class Closeable(object): 4 | close_called = False 5 | def close(self): 6 | self.close_called = True 7 | 8 | def test_register_unregister(): 9 | registry = Closer(atexit_register=False) 10 | c1 = Closeable() 11 | c2 = Closeable() 12 | 13 | assert not c1.close_called 14 | assert not c2.close_called 15 | registry.register(c1) 16 | id2 = registry.register(c2) 17 | 18 | registry.unregister(id2) 19 | registry.close() 20 | assert c1.close_called 21 | assert not c2.close_called 22 | -------------------------------------------------------------------------------- /bin/docker_entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is the entrypoint for our Docker image. 3 | 4 | set -ex 5 | 6 | # Set up display; otherwise rendering will fail 7 | Xvfb -screen 0 1024x768x24 & 8 | export DISPLAY=:0 9 | 10 | # Wait for the file to come up 11 | display=0 12 | file="/tmp/.X11-unix/X$display" 13 | for i in $(seq 1 10); do 14 | if [ -e "$file" ]; then 15 | break 16 | fi 17 | 18 | echo "Waiting for $file to be created (try $i/10)" 19 | sleep "$i" 20 | done 21 | if ! [ -e "$file" ]; then 22 | echo "Timing out: $file was not created" 23 | exit 1 24 | fi 25 | 26 | exec "$@" 27 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | language: python 3 | services: 4 | - docker 5 | env: 6 | # - UBUNTU_VER=14.04 - problems with atari-py 7 | - UBUNTU_VER=16.04 8 | - UBUNTU_VER=18.04 9 | 10 | install: "" # so travis doesn't do pip install requirements.txt 11 | script: 12 | - docker build -f test.dockerfile.${UBUNTU_VER} -t gym-test --build-arg MUJOCO_KEY=$MUJOCO_KEY . 13 | - docker run -e MUJOCO_KEY=$MUJOCO_KEY gym-test tox 14 | 15 | deploy: 16 | provider: pypi 17 | username: $TWINE_USERNAME 18 | password: $TWINE_PASSWORD 19 | on: 20 | tags: true 21 | condition: $UBUNTU_VER = 16.04 22 | -------------------------------------------------------------------------------- /bin/render.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import gym 4 | 5 | 6 | parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.') 7 | parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v1') 8 | parser.add_argument('--step', type=int, default=1) 9 | args = parser.parse_args() 10 | 11 | env = gym.make(args.env_id) 12 | env.reset() 13 | 14 | step = 0 15 | while True: 16 | if args.step: 17 | env.step(env.action_space.sample()) 18 | env.render() 19 | if step % 10 == 0: 20 | env.reset() 21 | step += 1 22 | -------------------------------------------------------------------------------- /examples/agents/_policies.py: -------------------------------------------------------------------------------- 1 | # Support code for cem.py 2 | 3 | class BinaryActionLinearPolicy(object): 4 | def __init__(self, theta): 5 | self.w = theta[:-1] 6 | self.b = theta[-1] 7 | def act(self, ob): 8 | y = ob.dot(self.w) + self.b 9 | a = int(y < 0) 10 | return a 11 | 12 | class ContinuousActionLinearPolicy(object): 13 | def __init__(self, theta, n_in, n_out): 14 | assert len(theta) == (n_in + 1) * n_out 15 | self.W = theta[0 : n_in * n_out].reshape(n_in, n_out) 16 | self.b = theta[n_in * n_out : None].reshape(1, n_out) 17 | def act(self, ob): 18 | a = ob.dot(self.W) + self.b 19 | return a 20 | -------------------------------------------------------------------------------- /gym/utils/json_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def json_encode_np(obj): 4 | """ 5 | JSON can't serialize numpy types, so convert to pure python 6 | """ 7 | if isinstance(obj, np.ndarray): 8 | return list(obj) 9 | elif isinstance(obj, np.float32): 10 | return float(obj) 11 | elif isinstance(obj, np.float64): 12 | return float(obj) 13 | elif isinstance(obj, np.int8): 14 | return int(obj) 15 | elif isinstance(obj, np.int16): 16 | return int(obj) 17 | elif isinstance(obj, np.int32): 18 | return int(obj) 19 | elif isinstance(obj, np.int64): 20 | return int(obj) 21 | else: 22 | return obj 23 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | OpenAI Gym is dedicated to providing a harassment-free experience for 2 | everyone, regardless of gender, gender identity and expression, sexual 3 | orientation, disability, physical appearance, body size, age, race, or 4 | religion. We do not tolerate harassment of participants in any form. 5 | 6 | This code of conduct applies to all OpenAI Gym spaces (including Gist 7 | comments) both online and off. Anyone who violates this code of 8 | conduct may be sanctioned or expelled from these spaces at the 9 | discretion of the OpenAI team. 10 | 11 | We may add additional rules over time, which will be made clearly 12 | available to participants. Participants are responsible for knowing 13 | and abiding by these rules. 14 | -------------------------------------------------------------------------------- /gym/spaces/multi_binary.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | class MultiBinary(gym.Space): 5 | def __init__(self, n): 6 | self.n = n 7 | gym.Space.__init__(self, (self.n,), np.int8) 8 | 9 | def sample(self): 10 | return gym.spaces.np_random.randint(low=0, high=2, size=self.n).astype(self.dtype) 11 | 12 | def contains(self, x): 13 | return ((x==0) | (x==1)).all() 14 | 15 | def to_jsonable(self, sample_n): 16 | return np.array(sample_n).tolist() 17 | 18 | def from_jsonable(self, sample_n): 19 | return [np.asarray(sample) for sample in sample_n] 20 | 21 | def __repr__(self): 22 | return "MultiBinary({})".format(self.n) 23 | 24 | def __eq__(self, other): 25 | return self.n == other.n 26 | -------------------------------------------------------------------------------- /gym/spaces/prng.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | np_random = numpy.random.RandomState() 4 | 5 | def seed(seed=None): 6 | """Seed the common numpy.random.RandomState used in spaces 7 | 8 | CF 9 | https://github.com/openai/gym/commit/58e6aa95e5af2c738557431f812abb81c505a7cf#commitcomment-17669277 10 | for some details about why we seed the spaces separately from the 11 | envs, but tl;dr is that it's pretty uncommon for them to be used 12 | within an actual algorithm, and the code becomes simpler to just 13 | use this common numpy.random.RandomState. 14 | """ 15 | np_random.seed(seed) 16 | 17 | # This numpy.random.RandomState gets used in all spaces for their 18 | # 'sample' method. It's not really expected that people will be using 19 | # these in their algorithms. 20 | seed(0) 21 | -------------------------------------------------------------------------------- /gym/logger.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from gym.utils import colorize 4 | 5 | DEBUG = 10 6 | INFO = 20 7 | WARN = 30 8 | ERROR = 40 9 | DISABLED = 50 10 | 11 | MIN_LEVEL = 30 12 | 13 | def set_level(level): 14 | """ 15 | Set logging threshold on current logger. 16 | """ 17 | global MIN_LEVEL 18 | MIN_LEVEL = level 19 | 20 | def debug(msg, *args): 21 | if MIN_LEVEL <= DEBUG: 22 | print('%s: %s'%('DEBUG', msg % args)) 23 | 24 | def info(msg, *args): 25 | if MIN_LEVEL <= INFO: 26 | print('%s: %s'%('INFO', msg % args)) 27 | 28 | def warn(msg, *args): 29 | if MIN_LEVEL <= WARN: 30 | warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow')) 31 | 32 | def error(msg, *args): 33 | if MIN_LEVEL <= ERROR: 34 | print(colorize('%s: %s'%('ERROR', msg % args), 'red')) 35 | 36 | # DEPRECATED: 37 | setLevel = set_level 38 | -------------------------------------------------------------------------------- /gym/envs/mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco.mujoco_env import MujocoEnv 2 | # ^^^^^ so that user gets the correct error 3 | # message if mujoco is not installed correctly 4 | from gym.envs.mujoco.ant import AntEnv 5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 6 | from gym.envs.mujoco.hopper import HopperEnv 7 | from gym.envs.mujoco.walker2d import Walker2dEnv 8 | from gym.envs.mujoco.humanoid import HumanoidEnv 9 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv 10 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv 11 | from gym.envs.mujoco.reacher import ReacherEnv 12 | from gym.envs.mujoco.swimmer import SwimmerEnv 13 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv 14 | from gym.envs.mujoco.pusher import PusherEnv 15 | from gym.envs.mujoco.thrower import ThrowerEnv 16 | from gym.envs.mujoco.striker import StrikerEnv 17 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/reach.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'reach.xml') 8 | 9 | 10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.4049, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | } 17 | fetch_env.FetchEnv.__init__( 18 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20, 19 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 20 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 21 | initial_qpos=initial_qpos, reward_type=reward_type) 22 | utils.EzPickle.__init__(self) 23 | -------------------------------------------------------------------------------- /gym/spaces/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | 4 | class Discrete(gym.Space): 5 | """ 6 | {0,1,...,n-1} 7 | 8 | Example usage: 9 | self.observation_space = spaces.Discrete(2) 10 | """ 11 | def __init__(self, n): 12 | self.n = n 13 | gym.Space.__init__(self, (), np.int64) 14 | 15 | def sample(self): 16 | return gym.spaces.np_random.randint(self.n) 17 | 18 | def contains(self, x): 19 | if isinstance(x, int): 20 | as_int = x 21 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.kind in np.typecodes['AllInteger'] and x.shape == ()): 22 | as_int = int(x) 23 | else: 24 | return False 25 | return as_int >= 0 and as_int < self.n 26 | 27 | def __repr__(self): 28 | return "Discrete(%d)" % self.n 29 | 30 | def __eq__(self, other): 31 | return self.n == other.n 32 | -------------------------------------------------------------------------------- /gym/envs/tests/spec_list.py: -------------------------------------------------------------------------------- 1 | from gym import envs, logger 2 | import os 3 | 4 | def should_skip_env_spec_for_tests(spec): 5 | # We skip tests for envs that require dependencies or are otherwise 6 | # troublesome to run frequently 7 | ep = spec._entry_point 8 | # Skip mujoco tests for pull request CI 9 | skip_mujoco = not (os.environ.get('MUJOCO_KEY')) 10 | if skip_mujoco and (ep.startswith('gym.envs.mujoco:') or ep.startswith('gym.envs.robotics:')): 11 | return True 12 | if ( 'GoEnv' in ep or 13 | 'HexEnv' in ep or 14 | (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest")) 15 | ): 16 | logger.warn("Skipping tests for env {}".format(ep)) 17 | return True 18 | return False 19 | 20 | spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec._entry_point is not None and not should_skip_env_spec_for_tests(spec)] 21 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/push.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml') 8 | 9 | 10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/wrappers/dict.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | 5 | __all__ = ['FlattenDictWrapper'] 6 | 7 | 8 | class FlattenDictWrapper(gym.ObservationWrapper): 9 | """Flattens selected keys of a Dict observation space into 10 | an array. 11 | """ 12 | def __init__(self, env, dict_keys): 13 | super(FlattenDictWrapper, self).__init__(env) 14 | self.dict_keys = dict_keys 15 | 16 | # Figure out observation_space dimension. 17 | size = 0 18 | for key in dict_keys: 19 | shape = self.env.observation_space.spaces[key].shape 20 | size += np.prod(shape) 21 | self.observation_space = gym.spaces.Box(-np.inf, np.inf, shape=(size,), dtype='float32') 22 | 23 | def observation(self, observation): 24 | assert isinstance(observation, dict) 25 | obs = [] 26 | for key in self.dict_keys: 27 | obs.append(observation[key].ravel()) 28 | return np.concatenate(obs) 29 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/duplicated_input.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to return every nth character from the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | from __future__ import division 6 | from gym.envs.algorithmic import algorithmic_env 7 | 8 | class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | def __init__(self, duplication=2, base=5): 10 | self.duplication = duplication 11 | super(DuplicatedInputEnv, self).__init__(base=base, chars=True) 12 | 13 | def generate_input_data(self, size): 14 | res = [] 15 | if size < self.duplication: 16 | size = self.duplication 17 | for i in range(size//self.duplication): 18 | char = self.np_random.randint(self.base) 19 | for _ in range(self.duplication): 20 | res.append(char) 21 | return res 22 | 23 | def target_from_input_data(self, input_data): 24 | return [input_data[i] for i in range(0, len(input_data), self.duplication)] 25 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/pick_and_place.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml') 8 | 9 | 10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, 20 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/slide.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from gym import utils 5 | from gym.envs.robotics import fetch_env 6 | 7 | 8 | # Ensure we get the path separator correct on windows 9 | MODEL_XML_PATH = os.path.join('fetch', 'slide.xml') 10 | 11 | 12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle): 13 | def __init__(self, reward_type='sparse'): 14 | initial_qpos = { 15 | 'robot0:slide0': 0.05, 16 | 'robot0:slide1': 0.48, 17 | 'robot0:slide2': 0.0, 18 | 'object0:joint': [1.7, 1.1, 0.4, 1., 0., 0., 0.], 19 | } 20 | fetch_env.FetchEnv.__init__( 21 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 22 | gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]), 23 | obj_range=0.1, target_range=0.3, distance_threshold=0.05, 24 | initial_qpos=initial_qpos, reward_type=reward_type) 25 | utils.EzPickle.__init__(self) 26 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /gym/utils/colorize.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | color2num = dict( 6 | gray=30, 7 | red=31, 8 | green=32, 9 | yellow=33, 10 | blue=34, 11 | magenta=35, 12 | cyan=36, 13 | white=37, 14 | crimson=38 15 | ) 16 | 17 | 18 | def colorize(string, color, bold=False, highlight = False): 19 | """Return string surrounded by appropriate terminal color codes to 20 | print colorized text. Valid colors: gray, red, green, yellow, 21 | blue, magenta, cyan, white, crimson 22 | """ 23 | 24 | # Import six here so that `utils` has no import-time dependencies. 25 | # We want this since we use `utils` during our import-time sanity checks 26 | # that verify that our dependencies (including six) are actually present. 27 | import six 28 | 29 | attr = [] 30 | num = color2num[color] 31 | if highlight: num += 10 32 | attr.append(six.u(str(num))) 33 | if bold: attr.append(six.u('1')) 34 | attrs = six.u(';').join(attr) 35 | return six.u('\x1b[%sm%s\x1b[0m') % (attrs, string) 36 | -------------------------------------------------------------------------------- /gym/utils/ezpickle.py: -------------------------------------------------------------------------------- 1 | class EzPickle(object): 2 | """Objects that are pickled and unpickled via their constructor 3 | arguments. 4 | 5 | Example usage: 6 | 7 | class Dog(Animal, EzPickle): 8 | def __init__(self, furcolor, tailkind="bushy"): 9 | Animal.__init__() 10 | EzPickle.__init__(furcolor, tailkind) 11 | ... 12 | 13 | When this object is unpickled, a new Dog will be constructed by passing the provided 14 | furcolor and tailkind into the constructor. However, philosophers are still not sure 15 | whether it is still the same dog. 16 | 17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo 18 | and Atari. 19 | """ 20 | def __init__(self, *args, **kwargs): 21 | self._ezpickle_args = args 22 | self._ezpickle_kwargs = kwargs 23 | def __getstate__(self): 24 | return {"_ezpickle_args" : self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs} 25 | def __setstate__(self, d): 26 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"]) 27 | self.__dict__.update(out.__dict__) 28 | -------------------------------------------------------------------------------- /gym/wrappers/README.md: -------------------------------------------------------------------------------- 1 | # Wrappers 2 | 3 | Wrappers are used to transform an environment in a modular way: 4 | 5 | ``` 6 | env = gym.make('Pong-v0') 7 | env = MyWrapper(env) 8 | ``` 9 | 10 | Note that we may later restructure any of the files in this directory, 11 | but will keep the wrappers available at the wrappers' top-level 12 | folder. So for example, you should access `MyWrapper` as follows: 13 | 14 | ``` 15 | # Will be supported in future releases 16 | from gym.wrappers import MyWrapper 17 | ``` 18 | 19 | ## Quick tips for writing your own wrapper 20 | 21 | - Don't forget to call super(class_name, self).__init__(env) if you override the wrapper's __init__ function 22 | - You can access the inner environment with `self.unwrapped` 23 | - You can access the previous layer using `self.env` 24 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer 25 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `_step`, `_reset`, `_render`, `_close`, or `_seed` 26 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`) 27 | -------------------------------------------------------------------------------- /gym/envs/mujoco/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2) 9 | 10 | def step(self, a): 11 | reward = 1.0 12 | self.do_simulation(a, self.frame_skip) 13 | ob = self._get_obs() 14 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2) 15 | done = not notdone 16 | return ob, reward, done, {} 17 | 18 | def reset_model(self): 19 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01) 20 | qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01) 21 | self.set_state(qpos, qvel) 22 | return self._get_obs() 23 | 24 | def _get_obs(self): 25 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel() 26 | 27 | def viewer_setup(self): 28 | v = self.viewer 29 | v.cam.trackbodyid = 0 30 | v.cam.distance = self.model.stat.extent 31 | -------------------------------------------------------------------------------- /gym/spaces/multi_discrete.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | class MultiDiscrete(gym.Space): 5 | def __init__(self, nvec): 6 | """ 7 | nvec: vector of counts of each categorical variable 8 | """ 9 | assert (np.array(nvec) > 0).all(), 'nvec (counts) have to be positive' 10 | self.nvec = np.asarray(nvec, dtype=np.uint32) 11 | gym.Space.__init__(self, self.nvec.shape, np.uint32) 12 | 13 | def sample(self): 14 | return (gym.spaces.np_random.random_sample(self.nvec.shape) * self.nvec).astype(self.dtype) 15 | 16 | def contains(self, x): 17 | # if nvec is uint32 and space dtype is uint32, then 0 <= x < self.nvec guarantees that x 18 | # is within correct bounds for space dtype (even though x does not have to be unsigned) 19 | return (0 <= x).all() and (x < self.nvec).all() 20 | 21 | def to_jsonable(self, sample_n): 22 | return [sample.tolist() for sample in sample_n] 23 | 24 | def from_jsonable(self, sample_n): 25 | return np.array(sample_n) 26 | 27 | def __repr__(self): 28 | return "MultiDiscrete({})".format(self.nvec) 29 | 30 | def __eq__(self, other): 31 | return np.all(self.nvec == other.nvec) 32 | -------------------------------------------------------------------------------- /gym/envs/mujoco/swimmer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | ctrl_cost_coeff = 0.0001 12 | xposbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | xposafter = self.sim.data.qpos[0] 15 | reward_fwd = (xposafter - xposbefore) / self.dt 16 | reward_ctrl = - ctrl_cost_coeff * np.square(a).sum() 17 | reward = reward_fwd + reward_ctrl 18 | ob = self._get_obs() 19 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | qpos = self.sim.data.qpos 23 | qvel = self.sim.data.qvel 24 | return np.concatenate([qpos.flat[2:], qvel.flat]) 25 | 26 | def reset_model(self): 27 | self.set_state( 28 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 29 | self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv) 30 | ) 31 | return self._get_obs() 32 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/reversed_addition.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from gym.envs.algorithmic import algorithmic_env 4 | 5 | class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv): 6 | def __init__(self, rows=2, base=3): 7 | super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False) 8 | 9 | def target_from_input_data(self, input_strings): 10 | curry = 0 11 | target = [] 12 | for digits in input_strings: 13 | total = sum(digits) + curry 14 | target.append(total % self.base) 15 | curry = total // self.base 16 | 17 | if curry > 0: 18 | target.append(curry) 19 | return target 20 | 21 | @property 22 | def time_limit(self): 23 | # Quirk preserved for the sake of consistency: add the length of the input 24 | # rather than the length of the desired output (which may differ if there's 25 | # an extra carried digit). 26 | # TODO: It seems like this time limit is so strict as to make Addition3-v0 27 | # unsolvable, since agents aren't even given enough time steps to look at 28 | # all the digits. (The solutions on the scoreboard seem to only work by 29 | # save-scumming.) 30 | return self.input_width*2 + 4 31 | -------------------------------------------------------------------------------- /gym/envs/mujoco/half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, action): 11 | xposbefore = self.sim.data.qpos[0] 12 | self.do_simulation(action, self.frame_skip) 13 | xposafter = self.sim.data.qpos[0] 14 | ob = self._get_obs() 15 | reward_ctrl = - 0.1 * np.square(action).sum() 16 | reward_run = (xposafter - xposbefore)/self.dt 17 | reward = reward_ctrl + reward_run 18 | done = False 19 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | return np.concatenate([ 23 | self.sim.data.qpos.flat[1:], 24 | self.sim.data.qvel.flat, 25 | ]) 26 | 27 | def reset_model(self): 28 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq) 29 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 30 | self.set_state(qpos, qvel) 31 | return self._get_obs() 32 | 33 | def viewer_setup(self): 34 | self.viewer.cam.distance = self.model.stat.extent * 0.5 35 | -------------------------------------------------------------------------------- /gym/spaces/tuple_space.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | class Tuple(gym.Space): 4 | """ 5 | A tuple (i.e., product) of simpler spaces 6 | 7 | Example usage: 8 | self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3))) 9 | """ 10 | def __init__(self, spaces): 11 | self.spaces = spaces 12 | gym.Space.__init__(self, None, None) 13 | 14 | def sample(self): 15 | return tuple([space.sample() for space in self.spaces]) 16 | 17 | def contains(self, x): 18 | if isinstance(x, list): 19 | x = tuple(x) # Promote list to tuple for contains check 20 | return isinstance(x, tuple) and len(x) == len(self.spaces) and all( 21 | space.contains(part) for (space,part) in zip(self.spaces,x)) 22 | 23 | def __repr__(self): 24 | return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")" 25 | 26 | def to_jsonable(self, sample_n): 27 | # serialize as list-repr of tuple of vectors 28 | return [space.to_jsonable([sample[i] for sample in sample_n]) \ 29 | for i, space in enumerate(self.spaces)] 30 | 31 | def from_jsonable(self, sample_n): 32 | return [sample for sample in zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)])] 33 | 34 | def __eq__(self, other): 35 | return self.spaces == other.spaces 36 | -------------------------------------------------------------------------------- /test.dockerfile.14.04: -------------------------------------------------------------------------------- 1 | # A Dockerfile that sets up a full Gym install with test dependencies 2 | FROM ubuntu:14.04 3 | # Note that latest version of mujoco-py, 1.5, does not play nicely with ubuntu 14.04 - 4 | # requires patchelf system package not available on 14.04 5 | 6 | # Install keyboard-configuration separately to avoid travis hanging waiting for keyboard selection 7 | RUN \ 8 | apt-get -y update && \ 9 | apt-get install -y keyboard-configuration && \ 10 | 11 | apt-get install -y \ 12 | python-setuptools \ 13 | python-pip \ 14 | python3-dev \ 15 | libjpeg-dev \ 16 | cmake \ 17 | swig \ 18 | python-pyglet \ 19 | python3-opengl \ 20 | libboost-all-dev \ 21 | libsdl2-2.0.0 \ 22 | libsdl2-dev \ 23 | libglu1-mesa \ 24 | libglu1-mesa-dev \ 25 | libgles2-mesa-dev \ 26 | xvfb \ 27 | libav-tools \ 28 | freeglut3 \ 29 | wget \ 30 | unzip && \ 31 | 32 | apt-get clean && \ 33 | rm -rf /var/lib/apt/lists/* && \ 34 | pip install tox 35 | 36 | 37 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin 38 | 39 | # install dependencies 40 | COPY . /usr/local/gym/ 41 | RUN cd /usr/local/gym && \ 42 | tox --notest 43 | 44 | WORKDIR /usr/local/gym/ 45 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] 46 | CMD ["tox"] 47 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/push.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /test.dockerfile.18.04: -------------------------------------------------------------------------------- 1 | # A Dockerfile that sets up a full Gym install with test dependencies 2 | FROM ubuntu:18.04 3 | 4 | # Install keyboard-configuration separately to avoid travis hanging waiting for keyboard selection 5 | RUN \ 6 | apt -y update && \ 7 | apt install -y keyboard-configuration && \ 8 | 9 | apt install -y \ 10 | python-setuptools \ 11 | python-pip \ 12 | python3-dev \ 13 | python-pyglet \ 14 | python3-opengl \ 15 | libjpeg-dev \ 16 | libboost-all-dev \ 17 | libsdl2-dev \ 18 | libosmesa6-dev \ 19 | patchelf \ 20 | ffmpeg \ 21 | xvfb \ 22 | wget \ 23 | unzip && \ 24 | 25 | apt clean && \ 26 | rm -rf /var/lib/apt/lists/* && \ 27 | pip install tox && \ 28 | 29 | # Download mujoco 30 | mkdir /root/.mujoco && \ 31 | cd /root/.mujoco && \ 32 | wget https://www.roboti.us/download/mjpro150_linux.zip && \ 33 | unzip mjpro150_linux.zip 34 | 35 | ARG MUJOCO_KEY 36 | ENV MUJOCO_KEY=$MUJOCO_KEY 37 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin 38 | RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt 39 | 40 | # install dependencies 41 | COPY . /usr/local/gym/ 42 | RUN cd /usr/local/gym && \ 43 | tox --notest 44 | 45 | WORKDIR /usr/local/gym/ 46 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] 47 | CMD ["tox"] 48 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/slide.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /gym/utils/reraise.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # We keep the actual reraising in different modules, since the 4 | # reraising code uses syntax mutually exclusive to Python 2/3. 5 | if sys.version_info[0] < 3: 6 | from .reraise_impl_py2 import reraise_impl #pylint: disable=E0401 7 | else: 8 | from .reraise_impl_py3 import reraise_impl 9 | 10 | def reraise(prefix=None, suffix=None): 11 | old_exc_type, old_exc_value, traceback = sys.exc_info() 12 | if old_exc_value is None: 13 | old_exc_value = old_exc_type() 14 | 15 | e = ReraisedException(old_exc_value, prefix, suffix) 16 | 17 | reraise_impl(e, traceback) 18 | 19 | # http://stackoverflow.com/a/13653312 20 | def full_class_name(o): 21 | module = o.__class__.__module__ 22 | if module is None or module == str.__class__.__module__: 23 | return o.__class__.__name__ 24 | return module + '.' + o.__class__.__name__ 25 | 26 | class ReraisedException(Exception): 27 | def __init__(self, old_exc, prefix, suffix): 28 | self.old_exc = old_exc 29 | self.prefix = prefix 30 | self.suffix = suffix 31 | 32 | def __str__(self): 33 | klass = self.old_exc.__class__ 34 | 35 | orig = "%s: %s" % (full_class_name(self.old_exc), klass.__str__(self.old_exc)) 36 | prefixpart = suffixpart = '' 37 | if self.prefix is not None: 38 | prefixpart = self.prefix + "\n" 39 | if self.suffix is not None: 40 | suffixpart = "\n\n" + self.suffix 41 | return "%sThe original exception was:\n\n%s%s" % (prefixpart, orig, suffixpart) 42 | -------------------------------------------------------------------------------- /gym/envs/mujoco/walker2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, a): 12 | posbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | posafter, height, ang = self.sim.data.qpos[0:3] 15 | alive_bonus = 1.0 16 | reward = ((posafter - posbefore) / self.dt) 17 | reward += alive_bonus 18 | reward -= 1e-3 * np.square(a).sum() 19 | done = not (height > 0.8 and height < 2.0 and 20 | ang > -1.0 and ang < 1.0) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | qpos = self.sim.data.qpos 26 | qvel = self.sim.data.qvel 27 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel() 28 | 29 | def reset_model(self): 30 | self.set_state( 31 | self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq), 32 | self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | ) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.5 39 | self.viewer.cam.lookat[2] = 1.15 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /gym/envs/toy_text/roulette.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | 6 | class RouletteEnv(gym.Env): 7 | """Simple roulette environment 8 | 9 | The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up, 10 | you win a reward of 35. If the parity of your bet matches the parity 11 | of the spin, you win 1. Otherwise you receive a reward of -1. 12 | 13 | The long run reward for playing 0 should be -1/37 for any state 14 | 15 | The last action (38) stops the rollout for a return of 0 (walking away) 16 | """ 17 | def __init__(self, spots=37): 18 | self.n = spots + 1 19 | self.action_space = spaces.Discrete(self.n) 20 | self.observation_space = spaces.Discrete(1) 21 | self.seed() 22 | 23 | def seed(self, seed=None): 24 | self.np_random, seed = seeding.np_random(seed) 25 | return [seed] 26 | 27 | def step(self, action): 28 | assert self.action_space.contains(action) 29 | if action == self.n - 1: 30 | # observation, reward, done, info 31 | return 0, 0, True, {} 32 | 33 | # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B] 34 | val = self.np_random.randint(0, self.n - 1) 35 | if val == action == 0: 36 | reward = self.n - 2.0 37 | elif val != 0 and action != 0 and val % 2 == action % 2: 38 | reward = 1.0 39 | else: 40 | reward = -1.0 41 | return 0, reward, False, {} 42 | 43 | def reset(self): 44 | return 0 45 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # gym 2 | 3 | The MIT License 4 | 5 | Copyright (c) 2016 OpenAI (https://openai.com) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | # Mujoco models 26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license: 27 | ``` 28 | This file is part of MuJoCo. 29 | Copyright 2009-2015 Roboti LLC. 30 | Mujoco :: Advanced physics simulation engine 31 | Source : www.roboti.us 32 | Version : 1.31 33 | Released : 23Apr16 34 | Author :: Vikash Kumar 35 | Contacts : kumar@roboti.us 36 | ``` 37 | -------------------------------------------------------------------------------- /gym/envs/tests/test_registration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from gym import error, envs 3 | from gym.envs import registration 4 | from gym.envs.classic_control import cartpole 5 | 6 | def test_make(): 7 | env = envs.make('CartPole-v0') 8 | assert env.spec.id == 'CartPole-v0' 9 | assert isinstance(env.unwrapped, cartpole.CartPoleEnv) 10 | 11 | def test_make_deprecated(): 12 | try: 13 | envs.make('Humanoid-v0') 14 | except error.Error: 15 | pass 16 | else: 17 | assert False 18 | 19 | def test_spec(): 20 | spec = envs.spec('CartPole-v0') 21 | assert spec.id == 'CartPole-v0' 22 | 23 | def test_missing_lookup(): 24 | registry = registration.EnvRegistry() 25 | registry.register(id='Test-v0', entry_point=None) 26 | registry.register(id='Test-v15', entry_point=None) 27 | registry.register(id='Test-v9', entry_point=None) 28 | registry.register(id='Other-v100', entry_point=None) 29 | try: 30 | registry.spec('Test-v1') # must match an env name but not the version above 31 | except error.DeprecatedEnv: 32 | pass 33 | else: 34 | assert False 35 | 36 | try: 37 | registry.spec('Unknown-v1') 38 | except error.UnregisteredEnv: 39 | pass 40 | else: 41 | assert False 42 | 43 | def test_malformed_lookup(): 44 | registry = registration.EnvRegistry() 45 | try: 46 | registry.spec(u'“Breakout-v0”') 47 | except error.Error as e: 48 | assert 'malformed environment ID' in '{}'.format(e), 'Unexpected message: {}'.format(e) 49 | else: 50 | assert False 51 | -------------------------------------------------------------------------------- /gym/envs/mujoco/hopper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | posbefore = self.sim.data.qpos[0] 12 | self.do_simulation(a, self.frame_skip) 13 | posafter, height, ang = self.sim.data.qpos[0:3] 14 | alive_bonus = 1.0 15 | reward = (posafter - posbefore) / self.dt 16 | reward += alive_bonus 17 | reward -= 1e-3 * np.square(a).sum() 18 | s = self.state_vector() 19 | done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and 20 | (height > .7) and (abs(ang) < .2)) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | return np.concatenate([ 26 | self.sim.data.qpos.flat[1:], 27 | np.clip(self.sim.data.qvel.flat, -10, 10) 28 | ]) 29 | 30 | def reset_model(self): 31 | qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq) 32 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | self.set_state(qpos, qvel) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.75 39 | self.viewer.cam.lookat[2] = 1.15 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /test.dockerfile.16.04: -------------------------------------------------------------------------------- 1 | # A Dockerfile that sets up a full Gym install with test dependencies 2 | FROM ubuntu:16.04 3 | 4 | # Install keyboard-configuration separately to avoid travis hanging waiting for keyboard selection 5 | RUN \ 6 | apt-get -y update && \ 7 | apt-get install -y keyboard-configuration && \ 8 | 9 | # Maybe Install python3.6 on ubuntu 16.04 ? 10 | # apt-get install -y software-properties-common && \ 11 | # add-apt-repository -y ppa:jonathonf/python-3.6 && \ 12 | # apt-get -y update && \ 13 | # apt-get -y install python3.6 python3.6-distutils python3.6-dev 14 | 15 | apt-get install -y \ 16 | python-setuptools \ 17 | python-pip \ 18 | python3-dev \ 19 | python-pyglet \ 20 | python3-opengl \ 21 | libjpeg-dev \ 22 | libboost-all-dev \ 23 | libsdl2-dev \ 24 | libosmesa6-dev \ 25 | patchelf \ 26 | xvfb \ 27 | ffmpeg \ 28 | wget \ 29 | unzip && \ 30 | 31 | apt-get clean && \ 32 | rm -rf /var/lib/apt/lists/* && \ 33 | pip install tox && \ 34 | 35 | # Download mujoco 36 | mkdir /root/.mujoco && \ 37 | cd /root/.mujoco && \ 38 | wget https://www.roboti.us/download/mjpro150_linux.zip && \ 39 | unzip mjpro150_linux.zip 40 | 41 | ARG MUJOCO_KEY 42 | ENV MUJOCO_KEY=$MUJOCO_KEY 43 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin 44 | 45 | RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt 46 | 47 | # install dependencies 48 | COPY . /usr/local/gym/ 49 | RUN cd /usr/local/gym && \ 50 | tox --notest 51 | 52 | WORKDIR /usr/local/gym/ 53 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] 54 | CMD ["tox"] 55 | -------------------------------------------------------------------------------- /gym/envs/mujoco/inverted_double_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, action): 12 | self.do_simulation(action, self.frame_skip) 13 | ob = self._get_obs() 14 | x, _, y = self.sim.data.site_xpos[0] 15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2 16 | v1, v2 = self.sim.data.qvel[1:3] 17 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 18 | alive_bonus = 10 19 | r = alive_bonus - dist_penalty - vel_penalty 20 | done = bool(y <= 1) 21 | return ob, r, done, {} 22 | 23 | def _get_obs(self): 24 | return np.concatenate([ 25 | self.sim.data.qpos[:1], # cart x pos 26 | np.sin(self.sim.data.qpos[1:]), # link angles 27 | np.cos(self.sim.data.qpos[1:]), 28 | np.clip(self.sim.data.qvel, -10, 10), 29 | np.clip(self.sim.data.qfrc_constraint, -10, 10) 30 | ]).ravel() 31 | 32 | def reset_model(self): 33 | self.set_state( 34 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 35 | self.init_qvel + self.np_random.randn(self.model.nv) * .1 36 | ) 37 | return self._get_obs() 38 | 39 | def viewer_setup(self): 40 | v = self.viewer 41 | v.cam.trackbodyid = 0 42 | v.cam.distance = self.model.stat.extent * 0.5 43 | v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2] 44 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/pick_and_place.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /gym/envs/mujoco/reacher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2) 9 | 10 | def step(self, a): 11 | vec = self.get_body_com("fingertip")-self.get_body_com("target") 12 | reward_dist = - np.linalg.norm(vec) 13 | reward_ctrl = - np.square(a).sum() 14 | reward = reward_dist + reward_ctrl 15 | self.do_simulation(a, self.frame_skip) 16 | ob = self._get_obs() 17 | done = False 18 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) 19 | 20 | def viewer_setup(self): 21 | self.viewer.cam.trackbodyid = 0 22 | 23 | def reset_model(self): 24 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos 25 | while True: 26 | self.goal = self.np_random.uniform(low=-.2, high=.2, size=2) 27 | if np.linalg.norm(self.goal) < 2: 28 | break 29 | qpos[-2:] = self.goal 30 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 31 | qvel[-2:] = 0 32 | self.set_state(qpos, qvel) 33 | return self._get_obs() 34 | 35 | def _get_obs(self): 36 | theta = self.sim.data.qpos.flat[:2] 37 | return np.concatenate([ 38 | np.cos(theta), 39 | np.sin(theta), 40 | self.sim.data.qpos.flat[2:], 41 | self.sim.data.qvel.flat[:2], 42 | self.get_body_com("fingertip") - self.get_body_com("target") 43 | ]) 44 | -------------------------------------------------------------------------------- /gym/wrappers/time_limit.py: -------------------------------------------------------------------------------- 1 | import time 2 | from gym import Wrapper, logger 3 | 4 | class TimeLimit(Wrapper): 5 | def __init__(self, env, max_episode_seconds=None, max_episode_steps=None): 6 | super(TimeLimit, self).__init__(env) 7 | self._max_episode_seconds = max_episode_seconds 8 | self._max_episode_steps = max_episode_steps 9 | 10 | self._elapsed_steps = 0 11 | self._episode_started_at = None 12 | 13 | @property 14 | def _elapsed_seconds(self): 15 | return time.time() - self._episode_started_at 16 | 17 | def _past_limit(self): 18 | """Return true if we are past our limit""" 19 | if self._max_episode_steps is not None and self._max_episode_steps <= self._elapsed_steps: 20 | logger.debug("Env has passed the step limit defined by TimeLimit.") 21 | return True 22 | 23 | if self._max_episode_seconds is not None and self._max_episode_seconds <= self._elapsed_seconds: 24 | logger.debug("Env has passed the seconds limit defined by TimeLimit.") 25 | return True 26 | 27 | return False 28 | 29 | def step(self, action): 30 | assert self._episode_started_at is not None, "Cannot call env.step() before calling reset()" 31 | observation, reward, done, info = self.env.step(action) 32 | self._elapsed_steps += 1 33 | 34 | if self._past_limit(): 35 | if self.metadata.get('semantics.autoreset'): 36 | _ = self.reset() # automatically reset the env 37 | done = True 38 | 39 | return observation, reward, done, info 40 | 41 | def reset(self): 42 | self._episode_started_at = time.time() 43 | self._elapsed_steps = 0 44 | return self.env.reset() 45 | -------------------------------------------------------------------------------- /gym/envs/toy_text/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import Env, spaces 4 | from gym.utils import seeding 5 | 6 | def categorical_sample(prob_n, np_random): 7 | """ 8 | Sample from categorical distribution 9 | Each row specifies class probabilities 10 | """ 11 | prob_n = np.asarray(prob_n) 12 | csprob_n = np.cumsum(prob_n) 13 | return (csprob_n > np_random.rand()).argmax() 14 | 15 | 16 | class DiscreteEnv(Env): 17 | 18 | """ 19 | Has the following members 20 | - nS: number of states 21 | - nA: number of actions 22 | - P: transitions (*) 23 | - isd: initial state distribution (**) 24 | 25 | (*) dictionary dict of dicts of lists, where 26 | P[s][a] == [(probability, nextstate, reward, done), ...] 27 | (**) list or array of length nS 28 | 29 | 30 | """ 31 | def __init__(self, nS, nA, P, isd): 32 | self.P = P 33 | self.isd = isd 34 | self.lastaction=None # for rendering 35 | self.nS = nS 36 | self.nA = nA 37 | 38 | self.action_space = spaces.Discrete(self.nA) 39 | self.observation_space = spaces.Discrete(self.nS) 40 | 41 | self.seed() 42 | self.reset() 43 | 44 | def seed(self, seed=None): 45 | self.np_random, seed = seeding.np_random(seed) 46 | return [seed] 47 | 48 | def reset(self): 49 | self.s = categorical_sample(self.isd, self.np_random) 50 | self.lastaction=None 51 | return self.s 52 | 53 | def step(self, a): 54 | transitions = self.P[self.s][a] 55 | i = categorical_sample([t[0] for t in transitions], self.np_random) 56 | p, s, r, d= transitions[i] 57 | self.s = s 58 | self.lastaction=a 59 | return (s, r, d, {"prob" : p}) 60 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import sys, os.path 3 | 4 | # Don't import gym module here, since deps may not be installed 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'gym')) 6 | from version import VERSION 7 | 8 | # Environment-specific dependencies. 9 | extras = { 10 | 'atari': ['atari_py>=0.1.4', 'Pillow', 'PyOpenGL'], 11 | 'box2d': ['box2d-py>=2.3.5'], 12 | 'classic_control': ['PyOpenGL'], 13 | 'mujoco': ['mujoco_py>=1.50', 'imageio'], 14 | 'robotics': ['mujoco_py>=1.50', 'imageio'], 15 | } 16 | 17 | # Meta dependency groups. 18 | all_deps = [] 19 | for group_name in extras: 20 | all_deps += extras[group_name] 21 | extras['all'] = all_deps 22 | 23 | setup(name='gym', 24 | version=VERSION, 25 | description='The OpenAI Gym: A toolkit for developing and comparing your reinforcement learning agents.', 26 | url='https://github.com/openai/gym', 27 | author='OpenAI', 28 | author_email='gym@openai.com', 29 | license='', 30 | packages=[package for package in find_packages() 31 | if package.startswith('gym')], 32 | zip_safe=False, 33 | install_requires=[ 34 | 'scipy', 'numpy>=1.10.4', 'requests>=2.0', 'six', 'pyglet>=1.2.0', 35 | ], 36 | extras_require=extras, 37 | package_data={'gym': [ 38 | 'envs/mujoco/assets/*.xml', 39 | 'envs/classic_control/assets/*.png', 40 | 'envs/robotics/assets/LICENSE.md', 41 | 'envs/robotics/assets/fetch/*.xml', 42 | 'envs/robotics/assets/hand/*.xml', 43 | 'envs/robotics/assets/stls/fetch/*.stl', 44 | 'envs/robotics/assets/stls/hand/*.stl', 45 | 'envs/robotics/assets/textures/*.png'] 46 | }, 47 | tests_require=['pytest', 'mock'], 48 | ) 49 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/test_video_recorder.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import tempfile 5 | import numpy as np 6 | 7 | import gym 8 | from gym.wrappers.monitoring.video_recorder import VideoRecorder 9 | 10 | class BrokenRecordableEnv(object): 11 | metadata = {'render.modes': [None, 'rgb_array']} 12 | 13 | def render(self, mode=None): 14 | pass 15 | 16 | class UnrecordableEnv(object): 17 | metadata = {'render.modes': [None]} 18 | 19 | def render(self, mode=None): 20 | pass 21 | 22 | def test_record_simple(): 23 | env = gym.make("CartPole-v1") 24 | rec = VideoRecorder(env) 25 | env.reset() 26 | rec.capture_frame() 27 | rec.close() 28 | assert not rec.empty 29 | assert not rec.broken 30 | assert os.path.exists(rec.path) 31 | f = open(rec.path) 32 | assert os.fstat(f.fileno()).st_size > 100 33 | 34 | def test_no_frames(): 35 | env = BrokenRecordableEnv() 36 | rec = VideoRecorder(env) 37 | rec.close() 38 | assert rec.empty 39 | assert rec.functional 40 | assert not os.path.exists(rec.path) 41 | 42 | def test_record_unrecordable_method(): 43 | env = UnrecordableEnv() 44 | rec = VideoRecorder(env) 45 | assert not rec.enabled 46 | rec.close() 47 | 48 | def test_record_breaking_render_method(): 49 | env = BrokenRecordableEnv() 50 | rec = VideoRecorder(env) 51 | rec.capture_frame() 52 | rec.close() 53 | assert rec.empty 54 | assert rec.broken 55 | assert not os.path.exists(rec.path) 56 | 57 | def test_text_envs(): 58 | env = gym.make('FrozenLake-v0') 59 | video = VideoRecorder(env) 60 | try: 61 | env.reset() 62 | video.capture_frame() 63 | video.close() 64 | finally: 65 | os.remove(video.path) 66 | -------------------------------------------------------------------------------- /gym/envs/mujoco/ant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | xposbefore = self.get_body_com("torso")[0] 12 | self.do_simulation(a, self.frame_skip) 13 | xposafter = self.get_body_com("torso")[0] 14 | forward_reward = (xposafter - xposbefore)/self.dt 15 | ctrl_cost = .5 * np.square(a).sum() 16 | contact_cost = 0.5 * 1e-3 * np.sum( 17 | np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))) 18 | survive_reward = 1.0 19 | reward = forward_reward - ctrl_cost - contact_cost + survive_reward 20 | state = self.state_vector() 21 | notdone = np.isfinite(state).all() \ 22 | and state[2] >= 0.2 and state[2] <= 1.0 23 | done = not notdone 24 | ob = self._get_obs() 25 | return ob, reward, done, dict( 26 | reward_forward=forward_reward, 27 | reward_ctrl=-ctrl_cost, 28 | reward_contact=-contact_cost, 29 | reward_survive=survive_reward) 30 | 31 | def _get_obs(self): 32 | return np.concatenate([ 33 | self.sim.data.qpos.flat[2:], 34 | self.sim.data.qvel.flat, 35 | np.clip(self.sim.data.cfrc_ext, -1, 1).flat, 36 | ]) 37 | 38 | def reset_model(self): 39 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1) 40 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 41 | self.set_state(qpos, qvel) 42 | return self._get_obs() 43 | 44 | def viewer_setup(self): 45 | self.viewer.cam.distance = self.model.stat.extent * 0.5 46 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/point.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 32 | -------------------------------------------------------------------------------- /gym/envs/mujoco/humanoidstandup.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco import mujoco_env 2 | from gym import utils 3 | import numpy as np 4 | 5 | class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def _get_obs(self): 11 | data = self.sim.data 12 | return np.concatenate([data.qpos.flat[2:], 13 | data.qvel.flat, 14 | data.cinert.flat, 15 | data.cvel.flat, 16 | data.qfrc_actuator.flat, 17 | data.cfrc_ext.flat]) 18 | 19 | def step(self, a): 20 | self.do_simulation(a, self.frame_skip) 21 | pos_after = self.sim.data.qpos[2] 22 | data = self.sim.data 23 | uph_cost = (pos_after - 0) / self.model.opt.timestep 24 | 25 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 26 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() 27 | quad_impact_cost = min(quad_impact_cost, 10) 28 | reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 29 | 30 | done = bool(False) 31 | return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost) 32 | 33 | def reset_model(self): 34 | c = 0.01 35 | self.set_state( 36 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 37 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,) 38 | ) 39 | return self._get_obs() 40 | 41 | def viewer_setup(self): 42 | self.viewer.cam.trackbodyid = 1 43 | self.viewer.cam.distance = self.model.stat.extent * 1.0 44 | self.viewer.cam.lookat[2] = 0.8925 45 | self.viewer.cam.elevation = -20 46 | -------------------------------------------------------------------------------- /examples/agents/random_agent.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | import gym 5 | from gym import wrappers, logger 6 | 7 | class RandomAgent(object): 8 | """The world's simplest agent!""" 9 | def __init__(self, action_space): 10 | self.action_space = action_space 11 | 12 | def act(self, observation, reward, done): 13 | return self.action_space.sample() 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser(description=None) 17 | parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run') 18 | args = parser.parse_args() 19 | 20 | # You can set the level to logger.DEBUG or logger.WARN if you 21 | # want to change the amount of output. 22 | logger.set_level(logger.INFO) 23 | 24 | env = gym.make(args.env_id) 25 | 26 | # You provide the directory to write to (can be an existing 27 | # directory, including one with existing data -- all monitor files 28 | # will be namespaced). You can also dump to a tempdir if you'd 29 | # like: tempfile.mkdtemp(). 30 | outdir = '/tmp/random-agent-results' 31 | env = wrappers.Monitor(env, directory=outdir, force=True) 32 | env.seed(0) 33 | agent = RandomAgent(env.action_space) 34 | 35 | episode_count = 100 36 | reward = 0 37 | done = False 38 | 39 | for i in range(episode_count): 40 | ob = env.reset() 41 | while True: 42 | action = agent.act(ob, reward, done) 43 | ob, reward, done, _ = env.step(action) 44 | if done: 45 | break 46 | # Note there's no env.render() here. But the environment still can open window and 47 | # render if asked by env.monitor: it calls env.render('rgb_array') to record video. 48 | # Video is not recorded every episode, see capped_cubic_video_schedule for details. 49 | 50 | # Close the env and write monitor result info to disk 51 | env.close() 52 | -------------------------------------------------------------------------------- /gym/envs/tests/test_envs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym import envs 5 | from gym.envs.tests.spec_list import spec_list 6 | 7 | # This runs a smoketest on each official registered env. We may want 8 | # to try also running environments which are not officially registered 9 | # envs. 10 | @pytest.mark.parametrize("spec", spec_list) 11 | def test_env(spec): 12 | # Capture warnings 13 | with pytest.warns(None) as warnings: 14 | env = spec.make() 15 | 16 | # Check that dtype is explicitly declared for gym.Box spaces 17 | for warning_msg in warnings: 18 | assert not 'autodetected dtype' in str(warning_msg.message) 19 | 20 | ob_space = env.observation_space 21 | act_space = env.action_space 22 | ob = env.reset() 23 | assert ob_space.contains(ob), 'Reset observation: {!r} not in space'.format(ob) 24 | a = act_space.sample() 25 | observation, reward, done, _info = env.step(a) 26 | assert ob_space.contains(observation), 'Step observation: {!r} not in space'.format(observation) 27 | assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env) 28 | assert isinstance(done, bool), "Expected {} to be a boolean".format(done) 29 | 30 | for mode in env.metadata.get('render.modes', []): 31 | env.render(mode=mode) 32 | 33 | # Make sure we can render the environment after close. 34 | for mode in env.metadata.get('render.modes', []): 35 | env.render(mode=mode) 36 | 37 | env.close() 38 | 39 | # Run a longer rollout on some environments 40 | def test_random_rollout(): 41 | for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]: 42 | agent = lambda ob: env.action_space.sample() 43 | ob = env.reset() 44 | for _ in range(10): 45 | assert env.observation_space.contains(ob) 46 | a = agent(ob) 47 | assert env.action_space.contains(a) 48 | (ob, _reward, done, _info) = env.step(a) 49 | if done: break 50 | env.close() 51 | -------------------------------------------------------------------------------- /gym/spaces/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import logger 5 | 6 | class Box(gym.Space): 7 | """ 8 | A box in R^n. 9 | I.e., each coordinate is bounded. 10 | 11 | Example usage: 12 | self.action_space = spaces.Box(low=-10, high=10, shape=(1,)) 13 | """ 14 | def __init__(self, low=None, high=None, shape=None, dtype=None): 15 | """ 16 | Two kinds of valid input: 17 | Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided 18 | Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape 19 | """ 20 | if shape is None: 21 | assert low.shape == high.shape 22 | shape = low.shape 23 | else: 24 | assert np.isscalar(low) and np.isscalar(high) 25 | low = low + np.zeros(shape) 26 | high = high + np.zeros(shape) 27 | if dtype is None: # Autodetect type 28 | if (high == 255).all(): 29 | dtype = np.uint8 30 | else: 31 | dtype = np.float32 32 | logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype)) 33 | self.low = low.astype(dtype) 34 | self.high = high.astype(dtype) 35 | gym.Space.__init__(self, shape, dtype) 36 | 37 | def sample(self): 38 | return gym.spaces.np_random.uniform(low=self.low, high=self.high + (0 if self.dtype.kind == 'f' else 1), size=self.low.shape).astype(self.dtype) 39 | 40 | def contains(self, x): 41 | return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all() 42 | 43 | def to_jsonable(self, sample_n): 44 | return np.array(sample_n).tolist() 45 | 46 | def from_jsonable(self, sample_n): 47 | return [np.asarray(sample) for sample in sample_n] 48 | 49 | def __repr__(self): 50 | return "Box" + str(self.shape) 51 | 52 | def __eq__(self, other): 53 | return np.allclose(self.low, other.low) and np.allclose(self.high, other.high) 54 | -------------------------------------------------------------------------------- /gym/envs/robotics/hand_env.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import numpy as np 4 | 5 | import gym 6 | from gym import error, spaces 7 | from gym.utils import seeding 8 | from gym.envs.robotics import robot_env 9 | 10 | 11 | class HandEnv(robot_env.RobotEnv): 12 | def __init__(self, model_path, n_substeps, initial_qpos, relative_control): 13 | self.relative_control = relative_control 14 | 15 | super(HandEnv, self).__init__( 16 | model_path=model_path, n_substeps=n_substeps, n_actions=20, 17 | initial_qpos=initial_qpos) 18 | 19 | # RobotEnv methods 20 | # ---------------------------- 21 | 22 | def _set_action(self, action): 23 | assert action.shape == (20,) 24 | 25 | ctrlrange = self.sim.model.actuator_ctrlrange 26 | actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2. 27 | if self.relative_control: 28 | actuation_center = np.zeros_like(action) 29 | for i in range(self.sim.data.ctrl.shape[0]): 30 | actuation_center[i] = self.sim.data.get_joint_qpos( 31 | self.sim.model.actuator_names[i].replace(':A_', ':')) 32 | for joint_name in ['FF', 'MF', 'RF', 'LF']: 33 | act_idx = self.sim.model.actuator_name2id( 34 | 'robot0:A_{}J1'.format(joint_name)) 35 | actuation_center[act_idx] += self.sim.data.get_joint_qpos( 36 | 'robot0:{}J0'.format(joint_name)) 37 | else: 38 | actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2. 39 | self.sim.data.ctrl[:] = actuation_center + action * actuation_range 40 | self.sim.data.ctrl[:] = np.clip(self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1]) 41 | 42 | def _viewer_setup(self): 43 | body_id = self.sim.model.body_name2id('robot0:palm') 44 | lookat = self.sim.data.body_xpos[body_id] 45 | for idx, value in enumerate(lookat): 46 | self.viewer.cam.lookat[idx] = value 47 | self.viewer.cam.distance = 0.5 48 | self.viewer.cam.azimuth = 55. 49 | self.viewer.cam.elevation = -25. 50 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/inverted_double_pendulum.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /gym/envs/mujoco/pusher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | import mujoco_py 6 | 7 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): 8 | def __init__(self): 9 | utils.EzPickle.__init__(self) 10 | mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5) 11 | 12 | def step(self, a): 13 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") 14 | vec_2 = self.get_body_com("object") - self.get_body_com("goal") 15 | 16 | reward_near = - np.linalg.norm(vec_1) 17 | reward_dist = - np.linalg.norm(vec_2) 18 | reward_ctrl = - np.square(a).sum() 19 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near 20 | 21 | self.do_simulation(a, self.frame_skip) 22 | ob = self._get_obs() 23 | done = False 24 | return ob, reward, done, dict(reward_dist=reward_dist, 25 | reward_ctrl=reward_ctrl) 26 | 27 | def viewer_setup(self): 28 | self.viewer.cam.trackbodyid = -1 29 | self.viewer.cam.distance = 4.0 30 | 31 | def reset_model(self): 32 | qpos = self.init_qpos 33 | 34 | self.goal_pos = np.asarray([0, 0]) 35 | while True: 36 | self.cylinder_pos = np.concatenate([ 37 | self.np_random.uniform(low=-0.3, high=0, size=1), 38 | self.np_random.uniform(low=-0.2, high=0.2, size=1)]) 39 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: 40 | break 41 | 42 | qpos[-4:-2] = self.cylinder_pos 43 | qpos[-2:] = self.goal_pos 44 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 45 | high=0.005, size=self.model.nv) 46 | qvel[-4:] = 0 47 | self.set_state(qpos, qvel) 48 | return self._get_obs() 49 | 50 | def _get_obs(self): 51 | return np.concatenate([ 52 | self.sim.data.qpos.flat[:7], 53 | self.sim.data.qvel.flat[:7], 54 | self.get_body_com("tips_arm"), 55 | self.get_body_com("object"), 56 | self.get_body_com("goal"), 57 | ]) 58 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/shared_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /gym/utils/atomic_write.py: -------------------------------------------------------------------------------- 1 | # Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python 2 | 3 | import os 4 | from contextlib import contextmanager 5 | 6 | # We would ideally atomically replace any existing file with the new 7 | # version. However, on Windows there's no Python-only solution prior 8 | # to Python 3.3. (This library includes a C extension to do so: 9 | # https://pypi.python.org/pypi/pyosreplace/0.1.) 10 | # 11 | # Correspondingly, we make a best effort, but on Python < 3.3 use a 12 | # replace method which could result in the file temporarily 13 | # disappearing. 14 | import sys 15 | if sys.version_info >= (3, 3): 16 | # Python 3.3 and up have a native `replace` method 17 | from os import replace 18 | elif sys.platform.startswith("win"): 19 | def replace(src, dst): 20 | # TODO: on Windows, this will raise if the file is in use, 21 | # which is possible. We'll need to make this more robust over 22 | # time. 23 | try: 24 | os.remove(dst) 25 | except OSError: 26 | pass 27 | os.rename(src, dst) 28 | else: 29 | # POSIX rename() is always atomic 30 | from os import rename as replace 31 | 32 | @contextmanager 33 | def atomic_write(filepath, binary=False, fsync=False): 34 | """ Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked. 35 | 36 | :param filepath: the file path to be opened 37 | :param binary: whether to open the file in a binary mode instead of textual 38 | :param fsync: whether to force write the file to disk 39 | """ 40 | 41 | tmppath = filepath + '~' 42 | while os.path.isfile(tmppath): 43 | tmppath += '~' 44 | try: 45 | with open(tmppath, 'wb' if binary else 'w') as file: 46 | yield file 47 | if fsync: 48 | file.flush() 49 | os.fsync(file.fileno()) 50 | replace(tmppath, filepath) 51 | finally: 52 | try: 53 | os.remove(tmppath) 54 | except (IOError, OSError): 55 | pass 56 | -------------------------------------------------------------------------------- /gym/utils/closer.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import threading 3 | import weakref 4 | 5 | class Closer(object): 6 | """A registry that ensures your objects get closed, whether manually, 7 | upon garbage collection, or upon exit. To work properly, your 8 | objects need to cooperate and do something like the following: 9 | 10 | ``` 11 | closer = Closer() 12 | class Example(object): 13 | def __init__(self): 14 | self._id = closer.register(self) 15 | 16 | def close(self): 17 | # Probably worth making idempotent too! 18 | ... 19 | closer.unregister(self._id) 20 | 21 | def __del__(self): 22 | self.close() 23 | ``` 24 | 25 | That is, your objects should: 26 | 27 | - register() themselves and save the returned ID 28 | - unregister() themselves upon close() 29 | - include a __del__ method which close()'s the object 30 | """ 31 | 32 | def __init__(self, atexit_register=True): 33 | self.lock = threading.Lock() 34 | self.next_id = -1 35 | self.closeables = weakref.WeakValueDictionary() 36 | 37 | if atexit_register: 38 | atexit.register(self.close) 39 | 40 | def generate_next_id(self): 41 | with self.lock: 42 | self.next_id += 1 43 | return self.next_id 44 | 45 | def register(self, closeable): 46 | """Registers an object with a 'close' method. 47 | 48 | Returns: 49 | int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired. 50 | """ 51 | assert hasattr(closeable, 'close'), 'No close method for {}'.format(closeable) 52 | 53 | next_id = self.generate_next_id() 54 | self.closeables[next_id] = closeable 55 | return next_id 56 | 57 | def unregister(self, id): 58 | assert id is not None 59 | if id in self.closeables: 60 | del self.closeables[id] 61 | 62 | def close(self): 63 | # Explicitly fetch all monitors first so that they can't disappear while 64 | # we iterate. cf. http://stackoverflow.com/a/12429620 65 | closeables = list(self.closeables.values()) 66 | for closeable in closeables: 67 | closeable.close() 68 | -------------------------------------------------------------------------------- /gym/envs/mujoco/humanoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.envs.mujoco import mujoco_env 3 | from gym import utils 4 | 5 | def mass_center(model, sim): 6 | mass = np.expand_dims(model.body_mass, 1) 7 | xpos = sim.data.xipos 8 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0] 9 | 10 | class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): 11 | def __init__(self): 12 | mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5) 13 | utils.EzPickle.__init__(self) 14 | 15 | def _get_obs(self): 16 | data = self.sim.data 17 | return np.concatenate([data.qpos.flat[2:], 18 | data.qvel.flat, 19 | data.cinert.flat, 20 | data.cvel.flat, 21 | data.qfrc_actuator.flat, 22 | data.cfrc_ext.flat]) 23 | 24 | def step(self, a): 25 | pos_before = mass_center(self.model, self.sim) 26 | self.do_simulation(a, self.frame_skip) 27 | pos_after = mass_center(self.model, self.sim) 28 | alive_bonus = 5.0 29 | data = self.sim.data 30 | lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep 31 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 32 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() 33 | quad_impact_cost = min(quad_impact_cost, 10) 34 | reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus 35 | qpos = self.sim.data.qpos 36 | done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) 37 | return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost) 38 | 39 | def reset_model(self): 40 | c = 0.01 41 | self.set_state( 42 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 43 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,) 44 | ) 45 | return self._get_obs() 46 | 47 | def viewer_setup(self): 48 | self.viewer.cam.trackbodyid = 1 49 | self.viewer.cam.distance = self.model.stat.extent * 1.0 50 | self.viewer.cam.lookat[2] = 2.0 51 | self.viewer.cam.elevation = -20 52 | -------------------------------------------------------------------------------- /gym/envs/mujoco/thrower.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | self._ball_hit_ground = False 9 | self._ball_hit_location = None 10 | mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5) 11 | 12 | def step(self, a): 13 | ball_xy = self.get_body_com("ball")[:2] 14 | goal_xy = self.get_body_com("goal")[:2] 15 | 16 | if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25: 17 | self._ball_hit_ground = True 18 | self._ball_hit_location = self.get_body_com("ball") 19 | 20 | if self._ball_hit_ground: 21 | ball_hit_xy = self._ball_hit_location[:2] 22 | reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy) 23 | else: 24 | reward_dist = -np.linalg.norm(ball_xy - goal_xy) 25 | reward_ctrl = - np.square(a).sum() 26 | 27 | reward = reward_dist + 0.002 * reward_ctrl 28 | self.do_simulation(a, self.frame_skip) 29 | ob = self._get_obs() 30 | done = False 31 | return ob, reward, done, dict(reward_dist=reward_dist, 32 | reward_ctrl=reward_ctrl) 33 | 34 | def viewer_setup(self): 35 | self.viewer.cam.trackbodyid = 0 36 | self.viewer.cam.distance = 4.0 37 | 38 | def reset_model(self): 39 | self._ball_hit_ground = False 40 | self._ball_hit_location = None 41 | 42 | qpos = self.init_qpos 43 | self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3), 44 | self.np_random.uniform(low=-0.3, high=0.3)]) 45 | 46 | qpos[-9:-7] = self.goal 47 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 48 | high=0.005, size=self.model.nv) 49 | qvel[7:] = 0 50 | self.set_state(qpos, qvel) 51 | return self._get_obs() 52 | 53 | def _get_obs(self): 54 | return np.concatenate([ 55 | self.sim.data.qpos.flat[:7], 56 | self.sim.data.qvel.flat[:7], 57 | self.get_body_com("r_wrist_roll_link"), 58 | self.get_body_com("ball"), 59 | self.get_body_com("goal"), 60 | ]) 61 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_pen.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/swimmer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 39 | -------------------------------------------------------------------------------- /gym/envs/toy_text/nchain.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | class NChainEnv(gym.Env): 6 | """n-Chain environment 7 | 8 | This game presents moves along a linear chain of states, with two actions: 9 | 0) forward, which moves along the chain but returns no reward 10 | 1) backward, which returns to the beginning and has a small reward 11 | 12 | The end of the chain, however, presents a large reward, and by moving 13 | 'forward' at the end of the chain this large reward can be repeated. 14 | 15 | At each action, there is a small probability that the agent 'slips' and the 16 | opposite transition is instead taken. 17 | 18 | The observed state is the current state in the chain (0 to n-1). 19 | 20 | This environment is described in section 6.1 of: 21 | A Bayesian Framework for Reinforcement Learning by Malcolm Strens (2000) 22 | http://ceit.aut.ac.ir/~shiry/lecture/machine-learning/papers/BRL-2000.pdf 23 | """ 24 | def __init__(self, n=5, slip=0.2, small=2, large=10): 25 | self.n = n 26 | self.slip = slip # probability of 'slipping' an action 27 | self.small = small # payout for 'backwards' action 28 | self.large = large # payout at end of chain for 'forwards' action 29 | self.state = 0 # Start at beginning of the chain 30 | self.action_space = spaces.Discrete(2) 31 | self.observation_space = spaces.Discrete(self.n) 32 | self.seed() 33 | 34 | def seed(self, seed=None): 35 | self.np_random, seed = seeding.np_random(seed) 36 | return [seed] 37 | 38 | def step(self, action): 39 | assert self.action_space.contains(action) 40 | if self.np_random.rand() < self.slip: 41 | action = not action # agent slipped, reverse action taken 42 | if action: # 'backwards': go back to the beginning, get small reward 43 | reward = self.small 44 | self.state = 0 45 | elif self.state < self.n - 1: # 'forwards': go up along the chain 46 | reward = 0 47 | self.state += 1 48 | else: # 'forwards': stay at the end of the chain, collect large reward 49 | reward = self.large 50 | done = False 51 | return self.state, reward, done, {} 52 | 53 | def reset(self): 54 | self.state = 0 55 | return self.state 56 | -------------------------------------------------------------------------------- /gym/envs/toy_text/hotter_colder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import spaces 5 | from gym.utils import seeding 6 | 7 | 8 | class HotterColder(gym.Env): 9 | """Hotter Colder 10 | The goal of hotter colder is to guess closer to a randomly selected number 11 | 12 | After each step the agent receives an observation of: 13 | 0 - No guess yet submitted (only after reset) 14 | 1 - Guess is lower than the target 15 | 2 - Guess is equal to the target 16 | 3 - Guess is higher than the target 17 | 18 | The rewards is calculated as: 19 | (min(action, self.number) + self.range) / (max(action, self.number) + self.range) 20 | 21 | Ideally an agent will be able to recognise the 'scent' of a higher reward and 22 | increase the rate in which is guesses in that direction until the reward reaches 23 | its maximum 24 | """ 25 | def __init__(self): 26 | self.range = 1000 # +/- value the randomly select number can be between 27 | self.bounds = 2000 # Action space bounds 28 | 29 | self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), 30 | dtype=np.float32) 31 | self.observation_space = spaces.Discrete(4) 32 | 33 | self.number = 0 34 | self.guess_count = 0 35 | self.guess_max = 200 36 | self.observation = 0 37 | 38 | self.seed() 39 | self.reset() 40 | 41 | def seed(self, seed=None): 42 | self.np_random, seed = seeding.np_random(seed) 43 | return [seed] 44 | 45 | def step(self, action): 46 | assert self.action_space.contains(action) 47 | 48 | if action < self.number: 49 | self.observation = 1 50 | 51 | elif action == self.number: 52 | self.observation = 2 53 | 54 | elif action > self.number: 55 | self.observation = 3 56 | 57 | reward = ((min(action, self.number) + self.bounds) / (max(action, self.number) + self.bounds)) ** 2 58 | 59 | self.guess_count += 1 60 | done = self.guess_count >= self.guess_max 61 | 62 | return self.observation, reward[0], done, {"number": self.number, "guesses": self.guess_count} 63 | 64 | def reset(self): 65 | self.number = self.np_random.uniform(-self.range, self.range) 66 | self.guess_count = 0 67 | self.observation = 0 68 | return self.observation 69 | -------------------------------------------------------------------------------- /docs/environments.md: -------------------------------------------------------------------------------- 1 | # Environments 2 | 3 | The gym comes prepackaged with many many environments. It's this common API around many environments that makes the gym so great. Here we will list additional environments that do not come prepacked with the gym. Submit another to this list via a pull-request. 4 | 5 | _**NOTICE**: Its possible that in time OpenAI will develop a full fledged repository of supplemental environments. Until then this bit of markdown will suffice._ 6 | 7 | ## PGE: Parallel Game Engine 8 | 9 | PGE is a FOSS 3D engine for AI simulations, and can interoperate with the Gym. Contains environments with modern 3D graphics, and uses Bullet for physics. 10 | 11 | Learn more here: https://github.com/222464/PGE 12 | 13 | ## gym-inventory: Inventory Control Environments 14 | 15 | gym-inventory is a single agent domain featuring discrete state and action spaces that an AI agent might encounter in inventory control problems. 16 | 17 | Learn more here: https://github.com/paulhendricks/gym-inventory 18 | 19 | ## gym-gazebo: training Robots in Gazebo 20 | 21 | gym-gazebo presents an extension of the initial OpenAI gym for robotics using ROS and Gazebo, an advanced 3D modeling and 22 | rendering tool. 23 | 24 | Learn more here: https://github.com/erlerobot/gym-gazebo/ 25 | 26 | ## gym-maze: 2D maze environment 27 | A simple 2D maze environment where an agent finds its way from the start position to the goal. 28 | 29 | Learn more here: https://github.com/tuzzer/gym-maze/ 30 | 31 | ## gym-minigrid: Minimalistic Gridworld Environment 32 | 33 | A minimalistic gridworld environment. Seeks to minimize software dependencies, be easy to extend and deliver good performance for faster training. 34 | 35 | Learn more here: https://github.com/maximecb/gym-minigrid 36 | 37 | ## gym-sokoban: 2D Transportation Puzzles 38 | 39 | The environment consists of transportation puzzles in which the player's goal is to push all boxes on the warehouse's storage locations. 40 | The advantage of the environment is that it generates a new random level every time it is initialized or reset, which prevents over fitting to predefined levels. 41 | 42 | Learn more here: https://github.com/mpSchrader/gym-sokoban 43 | 44 | ## gym-duckietown: Lane-Following Simulator for Duckietown 45 | 46 | A lane-following simulator built for the [Duckietown](http://duckietown.org/) project (small-scale self-driving car course). 47 | 48 | Learn more here: https://github.com/duckietown/gym-duckietown 49 | -------------------------------------------------------------------------------- /examples/scripts/sim_env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import gym 3 | from gym import spaces, envs 4 | import argparse 5 | import numpy as np 6 | import itertools 7 | import time 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("env") 11 | parser.add_argument("--mode", choices=["noop", "random", "static", "human"], 12 | default="random") 13 | parser.add_argument("--max_steps", type=int, default=0) 14 | parser.add_argument("--fps",type=float) 15 | parser.add_argument("--once", action="store_true") 16 | parser.add_argument("--ignore_done", action="store_true") 17 | args = parser.parse_args() 18 | 19 | env = envs.make(args.env) 20 | ac_space = env.action_space 21 | 22 | fps = args.fps or env.metadata.get('video.frames_per_second') or 100 23 | if args.max_steps == 0: args.max_steps = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'] 24 | 25 | while True: 26 | env.reset() 27 | env.render(mode='human') 28 | print("Starting a new trajectory") 29 | for t in range(args.max_steps) if args.max_steps else itertools.count(): 30 | done = False 31 | if args.mode == "noop": 32 | if isinstance(ac_space, spaces.Box): 33 | a = np.zeros(ac_space.shape) 34 | elif isinstance(ac_space, spaces.Discrete): 35 | a = 0 36 | else: 37 | raise NotImplementedError("noop not implemented for class {}".format(type(ac_space))) 38 | _, _, done, _ = env.step(a) 39 | time.sleep(1.0/fps) 40 | elif args.mode == "random": 41 | a = ac_space.sample() 42 | _, _, done, _ = env.step(a) 43 | time.sleep(1.0/fps) 44 | elif args.mode == "static": 45 | time.sleep(1.0/fps) 46 | elif args.mode == "human": 47 | a = raw_input("type action from {0,...,%i} and press enter: "%(ac_space.n-1)) 48 | try: 49 | a = int(a) 50 | except ValueError: 51 | print("WARNING: ignoring illegal action '{}'.".format(a)) 52 | a = 0 53 | if a >= ac_space.n: 54 | print("WARNING: ignoring illegal action {}.".format(a)) 55 | a = 0 56 | _, _, done, _ = env.step(a) 57 | 58 | env.render() 59 | if done and not args.ignore_done: break 60 | print("Done after {} steps".format(t+1)) 61 | if args.once: 62 | break 63 | else: 64 | raw_input("Press enter to continue") 65 | -------------------------------------------------------------------------------- /docs/agents.md: -------------------------------------------------------------------------------- 1 | # Agents 2 | 3 | An "agent" describes the method of running an RL algorithm against an environment in the gym. The agent may contain the algorithm itself or simply provide an integration between an algorithm and the gym environments. 4 | 5 | ## RandomAgent 6 | 7 | A sample agent located in this repo at `gym/examples/agents/random_agent.py`. This simple agent leverages the environments ability to produce a random valid action and does so for each step. 8 | 9 | ## cem.py 10 | 11 | A generic Cross-Entropy agent located in this repo at `gym/examples/agents/cem.py`. This agent defaults to 10 iterations of 25 episodes considering the top 20% "elite". 12 | 13 | ## dqn 14 | 15 | This is a very basic DQN (with experience replay) implementation, which uses OpenAI's gym environment and Keras/Theano neural networks. [/sherjilozair/dqn](https://github.com/sherjilozair/dqn) 16 | 17 | ## Simple DQN 18 | 19 | Simple, fast and easy to extend DQN implementation using [Neon](https://github.com/NervanaSystems/neon) deep learning library. Comes with out-of-box tools to train, test and visualize models. For details see [this blog post](https://www.nervanasys.com/deep-reinforcement-learning-with-neon/) or check out the [repo](https://github.com/tambetm/simple_dqn). 20 | 21 | ## AgentNet 22 | A library that allows you to develop custom deep/convolutional/recurrent reinforcement learning agent with full integration with Theano/Lasagne. Also contains a toolkit for various reinforcement learning algorithms, policies, memory augmentations, etc. 23 | 24 | - The repo's here: [AgentNet](https://github.com/yandexdataschool/AgentNet) 25 | - [A step-by-step demo for Atari SpaceInvaders ](https://github.com/yandexdataschool/AgentNet/blob/master/examples/Playing%20Atari%20with%20Deep%20Reinforcement%20Learning%20%28OpenAI%20Gym%29.ipynb) 26 | 27 | ## rllab 28 | 29 | a framework for developing and evaluating reinforcement learning algorithms, fully compatible with OpenAI Gym. It includes a wide range of continuous control tasks plus implementations of many algorithms. [/rllab/rllab](https://github.com/rllab/rllab) 30 | 31 | ## [keras-rl](https://github.com/matthiasplappert/keras-rl) 32 | 33 | [keras-rl](https://github.com/matthiasplappert/keras-rl) implements some state-of-the art deep reinforcement learning algorithms. It was built with OpenAI Gym in mind, and also built on top of the deep learning library [Keras](https://keras.io/) and utilises similar design patterns like callbacks and user-definable metrics. 34 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_egg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/reacher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/manipulate_block.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /examples/agents/keyboard_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys, gym, time 5 | 6 | # 7 | # Test yourself as a learning agent! Pass environment name as a command-line argument, for example: 8 | # 9 | # python keyboard_agent.py SpaceInvadersNoFrameskip-v4 10 | # 11 | 12 | env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1]) 13 | 14 | if not hasattr(env.action_space, 'n'): 15 | raise Exception('Keyboard agent only supports discrete action spaces') 16 | ACTIONS = env.action_space.n 17 | SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you 18 | # can test what skip is still usable. 19 | 20 | human_agent_action = 0 21 | human_wants_restart = False 22 | human_sets_pause = False 23 | 24 | def key_press(key, mod): 25 | global human_agent_action, human_wants_restart, human_sets_pause 26 | if key==0xff0d: human_wants_restart = True 27 | if key==32: human_sets_pause = not human_sets_pause 28 | a = int( key - ord('0') ) 29 | if a <= 0 or a >= ACTIONS: return 30 | human_agent_action = a 31 | 32 | def key_release(key, mod): 33 | global human_agent_action 34 | a = int( key - ord('0') ) 35 | if a <= 0 or a >= ACTIONS: return 36 | if human_agent_action == a: 37 | human_agent_action = 0 38 | 39 | env.render() 40 | env.unwrapped.viewer.window.on_key_press = key_press 41 | env.unwrapped.viewer.window.on_key_release = key_release 42 | 43 | def rollout(env): 44 | global human_agent_action, human_wants_restart, human_sets_pause 45 | human_wants_restart = False 46 | obser = env.reset() 47 | skip = 0 48 | total_reward = 0 49 | total_timesteps = 0 50 | while 1: 51 | if not skip: 52 | #print("taking action {}".format(human_agent_action)) 53 | a = human_agent_action 54 | total_timesteps += 1 55 | skip = SKIP_CONTROL 56 | else: 57 | skip -= 1 58 | 59 | obser, r, done, info = env.step(a) 60 | if r != 0: 61 | print("reward %0.3f" % r) 62 | total_reward += r 63 | window_still_open = env.render() 64 | if window_still_open==False: return False 65 | if done: break 66 | if human_wants_restart: break 67 | while human_sets_pause: 68 | env.render() 69 | time.sleep(0.1) 70 | time.sleep(0.1) 71 | print("timesteps %i reward %0.2f" % (total_timesteps, total_reward)) 72 | 73 | print("ACTIONS={}".format(ACTIONS)) 74 | print("Press keys 1 2 3 ... to take actions 1 2 3 ...") 75 | print("No keys pressed is taking action 0") 76 | 77 | while 1: 78 | window_still_open = rollout(env) 79 | if window_still_open==False: break 80 | 81 | -------------------------------------------------------------------------------- /gym/envs/mujoco/striker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | self._striked = False 9 | self._min_strike_dist = np.inf 10 | self.strike_threshold = 0.1 11 | mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5) 12 | 13 | def step(self, a): 14 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") 15 | vec_2 = self.get_body_com("object") - self.get_body_com("goal") 16 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2)) 17 | 18 | if np.linalg.norm(vec_1) < self.strike_threshold: 19 | self._striked = True 20 | self._strike_pos = self.get_body_com("tips_arm") 21 | 22 | if self._striked: 23 | vec_3 = self.get_body_com("object") - self._strike_pos 24 | reward_near = - np.linalg.norm(vec_3) 25 | else: 26 | reward_near = - np.linalg.norm(vec_1) 27 | 28 | reward_dist = - np.linalg.norm(self._min_strike_dist) 29 | reward_ctrl = - np.square(a).sum() 30 | reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near 31 | 32 | self.do_simulation(a, self.frame_skip) 33 | ob = self._get_obs() 34 | done = False 35 | return ob, reward, done, dict(reward_dist=reward_dist, 36 | reward_ctrl=reward_ctrl) 37 | 38 | def viewer_setup(self): 39 | self.viewer.cam.trackbodyid = 0 40 | self.viewer.cam.distance = 4.0 41 | 42 | def reset_model(self): 43 | self._min_strike_dist = np.inf 44 | self._striked = False 45 | self._strike_pos = None 46 | 47 | qpos = self.init_qpos 48 | 49 | self.ball = np.array([0.5, -0.175]) 50 | while True: 51 | self.goal = np.concatenate([ 52 | self.np_random.uniform(low=0.15, high=0.7, size=1), 53 | self.np_random.uniform(low=0.1, high=1.0, size=1)]) 54 | if np.linalg.norm(self.ball - self.goal) > 0.17: 55 | break 56 | 57 | qpos[-9:-7] = [self.ball[1], self.ball[0]] 58 | qpos[-7:-5] = self.goal 59 | diff = self.ball - self.goal 60 | angle = -np.arctan(diff[0] / (diff[1] + 1e-8)) 61 | qpos[-1] = angle / 3.14 62 | qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1, 63 | size=self.model.nv) 64 | qvel[7:] = 0 65 | self.set_state(qpos, qvel) 66 | return self._get_obs() 67 | 68 | def _get_obs(self): 69 | return np.concatenate([ 70 | self.sim.data.qpos.flat[:7], 71 | self.sim.data.qvel.flat[:7], 72 | self.get_body_com("tips_arm"), 73 | self.get_body_com("object"), 74 | self.get_body_com("goal"), 75 | ]) 76 | -------------------------------------------------------------------------------- /examples/scripts/benchmark_runner: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Run all the tasks on a benchmark using a random agent. 4 | # 5 | # This script assumes you have set an OPENAI_GYM_API_KEY environment 6 | # variable. You can find your API key in the web interface: 7 | # https://gym.openai.com/settings/profile. 8 | # 9 | import argparse 10 | import logging 11 | import os 12 | import sys 13 | 14 | import gym 15 | # In modules, use `logger = logging.getLogger(__name__)` 16 | from gym import wrappers 17 | from gym.scoreboard.scoring import benchmark_score_from_local 18 | 19 | import openai_benchmark 20 | 21 | logger = logging.getLogger() 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser(description=None) 25 | parser.add_argument('-b', '--benchmark-id', help='id of benchmark to run e.g. Atari7Ram-v0') 26 | parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') 27 | parser.add_argument('-f', '--force', action='store_true', dest='force', default=False) 28 | parser.add_argument('-t', '--training-dir', default="/tmp/gym-results", help='What directory to upload.') 29 | args = parser.parse_args() 30 | 31 | if args.verbosity == 0: 32 | logger.setLevel(logging.INFO) 33 | elif args.verbosity >= 1: 34 | logger.setLevel(logging.DEBUG) 35 | 36 | benchmark_id = args.benchmark_id 37 | if benchmark_id is None: 38 | logger.info("Must supply a valid benchmark") 39 | return 1 40 | 41 | try: 42 | benchmark = gym.benchmark_spec(benchmark_id) 43 | except Exception: 44 | logger.info("Invalid benchmark") 45 | return 1 46 | 47 | # run benchmark tasks 48 | for task in benchmark.tasks: 49 | logger.info("Running on env: {}".format(task.env_id)) 50 | for trial in range(task.trials): 51 | env = gym.make(task.env_id) 52 | training_dir_name = "{}/{}-{}".format(args.training_dir, task.env_id, trial) 53 | env = wrappers.Monitor(env, training_dir_name, video_callable=False, force=args.force) 54 | env.reset() 55 | for _ in range(task.max_timesteps): 56 | o, r, done, _ = env.step(env.action_space.sample()) 57 | if done: 58 | env.reset() 59 | env.close() 60 | 61 | logger.info("""Computing statistics for this benchmark run... 62 | {{ 63 | score: {score}, 64 | num_envs_solved: {num_envs_solved}, 65 | summed_training_seconds: {summed_training_seconds}, 66 | start_to_finish_seconds: {start_to_finish_seconds}, 67 | }} 68 | 69 | """.rstrip().format(**benchmark_score_from_local(benchmark_id, args.training_dir))) 70 | 71 | logger.info("""Done running, upload results using the following command: 72 | 73 | python -c "import gym; gym.upload('{}', benchmark_id='{}', algorithm_id='(unknown)')" 74 | 75 | """.rstrip().format(args.training_dir, benchmark_id)) 76 | 77 | return 0 78 | 79 | if __name__ == '__main__': 80 | sys.exit(main()) 81 | -------------------------------------------------------------------------------- /gym/envs/robotics/README.md: -------------------------------------------------------------------------------- 1 | # Robotics environments 2 | 3 | Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics). 4 | 5 | If you use these environments, please cite the following paper: 6 | 7 | ``` 8 | @misc{1802.09464, 9 | Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba}, 10 | Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research}, 11 | Year = {2018}, 12 | Eprint = {arXiv:1802.09464}, 13 | } 14 | ``` 15 | 16 | ## Fetch environments 17 | 18 | 19 | [FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position. 20 | 21 | 22 | 23 | 24 | [FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal. 25 | 26 | 27 | 28 | 29 | [FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position. 30 | 31 | 32 | 33 | 34 | [FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table. 35 | 36 | ## Shadow Dexterous Hand environments 37 | 38 | 39 | [HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm. 40 | 41 | 42 | 43 | 44 | [HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation. 45 | 46 | 47 | 48 | 49 | [HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation. 50 | 51 | 52 | 53 | 54 | [HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation. 55 | -------------------------------------------------------------------------------- /gym/envs/tests/test_determinism.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from gym import spaces 4 | from gym.envs.tests.spec_list import spec_list 5 | 6 | @pytest.mark.parametrize("spec", spec_list) 7 | def test_env(spec): 8 | 9 | # Note that this precludes running this test in multiple 10 | # threads. However, we probably already can't do multithreading 11 | # due to some environments. 12 | spaces.seed(0) 13 | 14 | env1 = spec.make() 15 | env1.seed(0) 16 | action_samples1 = [env1.action_space.sample() for i in range(4)] 17 | initial_observation1 = env1.reset() 18 | step_responses1 = [env1.step(action) for action in action_samples1] 19 | env1.close() 20 | 21 | spaces.seed(0) 22 | 23 | env2 = spec.make() 24 | env2.seed(0) 25 | action_samples2 = [env2.action_space.sample() for i in range(4)] 26 | initial_observation2 = env2.reset() 27 | step_responses2 = [env2.step(action) for action in action_samples2] 28 | env2.close() 29 | 30 | for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)): 31 | try: 32 | assert_equals(action_sample1, action_sample2) 33 | except AssertionError: 34 | print('env1.action_space=', env1.action_space) 35 | print('env2.action_space=', env2.action_space) 36 | print('action_samples1=', action_samples1) 37 | print('action_samples2=', action_samples2) 38 | print('[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2)) 39 | raise 40 | 41 | # Don't check rollout equality if it's a a nondeterministic 42 | # environment. 43 | if spec.nondeterministic: 44 | return 45 | 46 | assert_equals(initial_observation1, initial_observation2) 47 | 48 | for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)): 49 | assert_equals(o1, o2, '[{}] '.format(i)) 50 | assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2) 51 | assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2) 52 | 53 | # Go returns a Pachi game board in info, which doesn't 54 | # properly check equality. For now, we hack around this by 55 | # just skipping Go. 56 | if spec.id not in ['Go9x9-v0', 'Go19x19-v0']: 57 | assert_equals(i1, i2, '[{}] '.format(i)) 58 | 59 | def assert_equals(a, b, prefix=None): 60 | assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b) 61 | if isinstance(a, dict): 62 | assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b) 63 | 64 | for k in a.keys(): 65 | v_a = a[k] 66 | v_b = b[k] 67 | assert_equals(v_a, v_b) 68 | elif isinstance(a, np.ndarray): 69 | np.testing.assert_array_equal(a, b) 70 | elif isinstance(a, tuple): 71 | for elem_from_a, elem_from_b in zip(a, b): 72 | assert_equals(elem_from_a, elem_from_b) 73 | else: 74 | assert a == b 75 | -------------------------------------------------------------------------------- /gym/spaces/tests/test_spaces.py: -------------------------------------------------------------------------------- 1 | import json # note: ujson fails this test due to float equality 2 | from copy import copy 3 | 4 | import numpy as np 5 | import pytest 6 | 7 | from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict 8 | 9 | 10 | @pytest.mark.parametrize("space", [ 11 | Discrete(3), 12 | Tuple([Discrete(5), Discrete(10)]), 13 | Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), 14 | Tuple((Discrete(5), Discrete(2), Discrete(2))), 15 | MultiDiscrete([2, 2, 100]), 16 | Dict({"position": Discrete(5), 17 | "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}), 18 | ]) 19 | def test_roundtripping(space): 20 | sample_1 = space.sample() 21 | sample_2 = space.sample() 22 | assert space.contains(sample_1) 23 | assert space.contains(sample_2) 24 | json_rep = space.to_jsonable([sample_1, sample_2]) 25 | 26 | json_roundtripped = json.loads(json.dumps(json_rep)) 27 | 28 | samples_after_roundtrip = space.from_jsonable(json_roundtripped) 29 | sample_1_prime, sample_2_prime = samples_after_roundtrip 30 | 31 | s1 = space.to_jsonable([sample_1]) 32 | s1p = space.to_jsonable([sample_1_prime]) 33 | s2 = space.to_jsonable([sample_2]) 34 | s2p = space.to_jsonable([sample_2_prime]) 35 | assert s1 == s1p, "Expected {} to equal {}".format(s1, s1p) 36 | assert s2 == s2p, "Expected {} to equal {}".format(s2, s2p) 37 | 38 | 39 | @pytest.mark.parametrize("space", [ 40 | Discrete(3), 41 | Box(low=np.array([-10, 0]),high=np.array([10, 10])), 42 | Tuple([Discrete(5), Discrete(10)]), 43 | Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), 44 | Tuple((Discrete(5), Discrete(2), Discrete(2))), 45 | MultiDiscrete([2, 2, 100]), 46 | MultiBinary(6), 47 | Dict({"position": Discrete(5), 48 | "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}), 49 | ]) 50 | def test_equality(space): 51 | space1 = space 52 | space2 = copy(space) 53 | assert space1 == space2, "Expected {} to equal {}".format(space1, space2) 54 | 55 | 56 | @pytest.mark.parametrize("spaces", [ 57 | (Discrete(3), Discrete(4)), 58 | (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])), 59 | (MultiBinary(8), MultiBinary(7)), 60 | (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32), 61 | Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)), 62 | (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])), 63 | (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})), 64 | (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})), 65 | ]) 66 | def test_inequality(spaces): 67 | space1, space2 = spaces 68 | assert space1 != space2, "Expected {} != {}".format(space1, space2) 69 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/hopper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 49 | -------------------------------------------------------------------------------- /gym/envs/README.md: -------------------------------------------------------------------------------- 1 | # Envs 2 | 3 | These are the core integrated environments. Note that we may later 4 | restructure any of the files, but will keep the environments available 5 | at the relevant package's top-level. So for example, you should access 6 | `AntEnv` as follows: 7 | 8 | ``` 9 | # Will be supported in future releases 10 | from gym.envs import mujoco 11 | mujoco.AntEnv 12 | ``` 13 | 14 | Rather than: 15 | 16 | ``` 17 | # May break in future releases 18 | from gym.envs.mujoco import ant 19 | ant.AntEnv 20 | ``` 21 | 22 | ## How to create new environments for Gym 23 | 24 | * Create a new repo called gym-foo, which should also be a PIP package. 25 | 26 | * A good example is https://github.com/openai/gym-soccer. 27 | 28 | * It should have at least the following files: 29 | ```sh 30 | gym-foo/ 31 | README.md 32 | setup.py 33 | gym_foo/ 34 | __init__.py 35 | envs/ 36 | __init__.py 37 | foo_env.py 38 | foo_extrahard_env.py 39 | ``` 40 | 41 | * `gym-foo/setup.py` should have: 42 | 43 | ```python 44 | from setuptools import setup 45 | 46 | setup(name='gym_foo', 47 | version='0.0.1', 48 | install_requires=['gym'] # And any other dependencies foo needs 49 | ) 50 | ``` 51 | 52 | * `gym-foo/gym_foo/__init__.py` should have: 53 | ```python 54 | from gym.envs.registration import register 55 | 56 | register( 57 | id='foo-v0', 58 | entry_point='gym_foo.envs:FooEnv', 59 | ) 60 | register( 61 | id='foo-extrahard-v0', 62 | entry_point='gym_foo.envs:FooExtraHardEnv', 63 | ) 64 | ``` 65 | 66 | * `gym-foo/gym_foo/envs/__init__.py` should have: 67 | ```python 68 | from gym_foo.envs.foo_env import FooEnv 69 | from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv 70 | ``` 71 | 72 | * `gym-foo/gym_foo/envs/foo_env.py` should look something like: 73 | ```python 74 | import gym 75 | from gym import error, spaces, utils 76 | from gym.utils import seeding 77 | 78 | class FooEnv(gym.Env): 79 | metadata = {'render.modes': ['human']} 80 | 81 | def __init__(self): 82 | ... 83 | def step(self, action): 84 | ... 85 | def reset(self): 86 | ... 87 | def render(self, mode='human', close=False): 88 | ... 89 | ``` 90 | 91 | ## How to add new environments to Gym, within this repo (not recommended for new environments) 92 | 93 | 1. Write your environment in an existing collection or a new collection. All collections are subfolders of `/gym/envs'. 94 | 2. Import your environment into the `__init__.py` file of the collection. This file will be located at `/gym/envs/my_collection/__init__.py`. Add `from gym.envs.my_collection.my_awesome_env import MyEnv` to this file. 95 | 3. Register your env in `/gym/envs/__init__.py`: 96 | 97 | ``` 98 | register( 99 | id='MyEnv-v0', 100 | entry_point='gym.envs.my_collection:MyEnv', 101 | ) 102 | ``` 103 | 104 | 4. Add your environment to the scoreboard in `/gym/scoreboard/__init__.py`: 105 | 106 | ``` 107 | add_task( 108 | id='MyEnv-v0', 109 | summary="Super cool environment", 110 | group='my_collection', 111 | contributor='mygithubhandle', 112 | ) 113 | ``` 114 | -------------------------------------------------------------------------------- /gym/spaces/dict_space.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from collections import OrderedDict 3 | 4 | class Dict(gym.Space): 5 | """ 6 | A dictionary of simpler spaces. 7 | 8 | Example usage: 9 | self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)}) 10 | 11 | Example usage [nested]: 12 | self.nested_observation_space = spaces.Dict({ 13 | 'sensors': spaces.Dict({ 14 | 'position': spaces.Box(low=-100, high=100, shape=(3,)), 15 | 'velocity': spaces.Box(low=-1, high=1, shape=(3,)), 16 | 'front_cam': spaces.Tuple(( 17 | spaces.Box(low=0, high=1, shape=(10, 10, 3)), 18 | spaces.Box(low=0, high=1, shape=(10, 10, 3)) 19 | )), 20 | 'rear_cam': spaces.Box(low=0, high=1, shape=(10, 10, 3)), 21 | }), 22 | 'ext_controller': spaces.MultiDiscrete([ [0,4], [0,1], [0,1] ]), 23 | 'inner_state':spaces.Dict({ 24 | 'charge': spaces.Discrete(100), 25 | 'system_checks': spaces.MultiBinary(10), 26 | 'job_status': spaces.Dict({ 27 | 'task': spaces.Discrete(5), 28 | 'progress': spaces.Box(low=0, high=100, shape=()), 29 | }) 30 | }) 31 | }) 32 | """ 33 | def __init__(self, spaces=None, **spaces_kwargs): 34 | assert (spaces is None) or (not spaces_kwargs), 'Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)' 35 | if spaces is None: 36 | spaces = spaces_kwargs 37 | if isinstance(spaces, dict) and not isinstance(spaces, OrderedDict): 38 | spaces = OrderedDict(sorted(list(spaces.items()))) 39 | if isinstance(spaces, list): 40 | spaces = OrderedDict(spaces) 41 | self.spaces = spaces 42 | gym.Space.__init__(self, None, None) # None for shape and dtype, since it'll require special handling 43 | 44 | def sample(self): 45 | return OrderedDict([(k, space.sample()) for k, space in self.spaces.items()]) 46 | 47 | def contains(self, x): 48 | if not isinstance(x, dict) or len(x) != len(self.spaces): 49 | return False 50 | for k, space in self.spaces.items(): 51 | if k not in x: 52 | return False 53 | if not space.contains(x[k]): 54 | return False 55 | return True 56 | 57 | def __repr__(self): 58 | return "Dict(" + ", ". join([k + ":" + str(s) for k, s in self.spaces.items()]) + ")" 59 | 60 | def to_jsonable(self, sample_n): 61 | # serialize as dict-repr of vectors 62 | return {key: space.to_jsonable([sample[key] for sample in sample_n]) \ 63 | for key, space in self.spaces.items()} 64 | 65 | def from_jsonable(self, sample_n): 66 | dict_of_list = {} 67 | for key, space in self.spaces.items(): 68 | dict_of_list[key] = space.from_jsonable(sample_n[key]) 69 | ret = [] 70 | for i, _ in enumerate(dict_of_list[key]): 71 | entry = {} 72 | for key, value in dict_of_list.items(): 73 | entry[key] = value[i] 74 | ret.append(entry) 75 | return ret 76 | 77 | def __eq__(self, other): 78 | return self.spaces == other.spaces 79 | -------------------------------------------------------------------------------- /gym/envs/classic_control/pendulum.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | import numpy as np 5 | from os import path 6 | 7 | class PendulumEnv(gym.Env): 8 | metadata = { 9 | 'render.modes' : ['human', 'rgb_array'], 10 | 'video.frames_per_second' : 30 11 | } 12 | 13 | def __init__(self): 14 | self.max_speed=8 15 | self.max_torque=2. 16 | self.dt=.05 17 | self.viewer = None 18 | 19 | high = np.array([1., 1., self.max_speed]) 20 | self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32) 21 | self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32) 22 | 23 | self.seed() 24 | 25 | def seed(self, seed=None): 26 | self.np_random, seed = seeding.np_random(seed) 27 | return [seed] 28 | 29 | def step(self,u): 30 | th, thdot = self.state # th := theta 31 | 32 | g = 10. 33 | m = 1. 34 | l = 1. 35 | dt = self.dt 36 | 37 | u = np.clip(u, -self.max_torque, self.max_torque)[0] 38 | self.last_u = u # for rendering 39 | costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2) 40 | 41 | newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt 42 | newth = th + newthdot*dt 43 | newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) #pylint: disable=E1111 44 | 45 | self.state = np.array([newth, newthdot]) 46 | return self._get_obs(), -costs, False, {} 47 | 48 | def reset(self): 49 | high = np.array([np.pi, 1]) 50 | self.state = self.np_random.uniform(low=-high, high=high) 51 | self.last_u = None 52 | return self._get_obs() 53 | 54 | def _get_obs(self): 55 | theta, thetadot = self.state 56 | return np.array([np.cos(theta), np.sin(theta), thetadot]) 57 | 58 | def render(self, mode='human'): 59 | 60 | if self.viewer is None: 61 | from gym.envs.classic_control import rendering 62 | self.viewer = rendering.Viewer(500,500) 63 | self.viewer.set_bounds(-2.2,2.2,-2.2,2.2) 64 | rod = rendering.make_capsule(1, .2) 65 | rod.set_color(.8, .3, .3) 66 | self.pole_transform = rendering.Transform() 67 | rod.add_attr(self.pole_transform) 68 | self.viewer.add_geom(rod) 69 | axle = rendering.make_circle(.05) 70 | axle.set_color(0,0,0) 71 | self.viewer.add_geom(axle) 72 | fname = path.join(path.dirname(__file__), "assets/clockwise.png") 73 | self.img = rendering.Image(fname, 1., 1.) 74 | self.imgtrans = rendering.Transform() 75 | self.img.add_attr(self.imgtrans) 76 | 77 | self.viewer.add_onetime(self.img) 78 | self.pole_transform.set_rotation(self.state[0] + np.pi/2) 79 | if self.last_u: 80 | self.imgtrans.scale = (-self.last_u/2, np.abs(self.last_u)/2) 81 | 82 | return self.viewer.render(return_rgb_array = mode=='rgb_array') 83 | 84 | def close(self): 85 | if self.viewer: 86 | self.viewer.close() 87 | self.viewer = None 88 | 89 | def angle_normalize(x): 90 | return (((x+np.pi) % (2*np.pi)) - np.pi) 91 | -------------------------------------------------------------------------------- /gym/envs/toy_text/guessing_game.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import spaces 5 | from gym.utils import seeding 6 | 7 | 8 | class GuessingGame(gym.Env): 9 | """Number guessing game 10 | 11 | The object of the game is to guess within 1% of the randomly chosen number 12 | within 200 time steps 13 | 14 | After each step the agent is provided with one of four possible observations 15 | which indicate where the guess is in relation to the randomly chosen number 16 | 17 | 0 - No guess yet submitted (only after reset) 18 | 1 - Guess is lower than the target 19 | 2 - Guess is equal to the target 20 | 3 - Guess is higher than the target 21 | 22 | The rewards are: 23 | 0 if the agent's guess is outside of 1% of the target 24 | 1 if the agent's guess is inside 1% of the target 25 | 26 | The episode terminates after the agent guesses within 1% of the target or 27 | 200 steps have been taken 28 | 29 | The agent will need to use a memory of previously submitted actions and observations 30 | in order to efficiently explore the available actions 31 | 32 | The purpose is to have agents optimise their exploration parameters (e.g. how far to 33 | explore from previous actions) based on previous experience. Because the goal changes 34 | each episode a state-value or action-value function isn't able to provide any additional 35 | benefit apart from being able to tell whether to increase or decrease the next guess. 36 | 37 | The perfect agent would likely learn the bounds of the action space (without referring 38 | to them explicitly) and then follow binary tree style exploration towards to goal number 39 | """ 40 | def __init__(self): 41 | self.range = 1000 # Randomly selected number is within +/- this value 42 | self.bounds = 10000 43 | 44 | self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), 45 | dtype=np.float32) 46 | self.observation_space = spaces.Discrete(4) 47 | 48 | self.number = 0 49 | self.guess_count = 0 50 | self.guess_max = 200 51 | self.observation = 0 52 | 53 | self.seed() 54 | self.reset() 55 | 56 | def seed(self, seed=None): 57 | self.np_random, seed = seeding.np_random(seed) 58 | return [seed] 59 | 60 | def step(self, action): 61 | assert self.action_space.contains(action) 62 | 63 | if action < self.number: 64 | self.observation = 1 65 | 66 | elif action == self.number: 67 | self.observation = 2 68 | 69 | elif action > self.number: 70 | self.observation = 3 71 | 72 | reward = 0 73 | done = False 74 | 75 | if (self.number - self.range * 0.01) < action < (self.number + self.range * 0.01): 76 | reward = 1 77 | done = True 78 | 79 | self.guess_count += 1 80 | if self.guess_count >= self.guess_max: 81 | done = True 82 | 83 | return self.observation, reward, done, {"number": self.number, "guesses": self.guess_count} 84 | 85 | def reset(self): 86 | self.number = self.np_random.uniform(-self.range, self.range) 87 | self.guess_count = 0 88 | self.observation = 0 89 | return self.observation 90 | -------------------------------------------------------------------------------- /gym/envs/tests/test_envs_semantics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Currently disabled since this was done in a very poor way 3 | Hashed str representation of objects 4 | """ 5 | 6 | 7 | from __future__ import unicode_literals 8 | import json 9 | import hashlib 10 | import os 11 | 12 | import pytest 13 | from gym import spaces, logger 14 | from gym.envs.tests.spec_list import spec_list 15 | 16 | DATA_DIR = os.path.dirname(__file__) 17 | ROLLOUT_STEPS = 100 18 | episodes = ROLLOUT_STEPS 19 | steps = ROLLOUT_STEPS 20 | 21 | ROLLOUT_FILE = os.path.join(DATA_DIR, 'rollout.json') 22 | 23 | if not os.path.isfile(ROLLOUT_FILE): 24 | with open(ROLLOUT_FILE, "w") as outfile: 25 | json.dump({}, outfile, indent=2) 26 | 27 | def hash_object(unhashed): 28 | return hashlib.sha256(str(unhashed).encode('utf-16')).hexdigest() # This is really bad, str could be same while values change 29 | 30 | def generate_rollout_hash(spec): 31 | spaces.seed(0) 32 | env = spec.make() 33 | env.seed(0) 34 | 35 | observation_list = [] 36 | action_list = [] 37 | reward_list = [] 38 | done_list = [] 39 | 40 | total_steps = 0 41 | for episode in range(episodes): 42 | if total_steps >= ROLLOUT_STEPS: break 43 | observation = env.reset() 44 | 45 | for step in range(steps): 46 | action = env.action_space.sample() 47 | observation, reward, done, _ = env.step(action) 48 | 49 | action_list.append(action) 50 | observation_list.append(observation) 51 | reward_list.append(reward) 52 | done_list.append(done) 53 | 54 | total_steps += 1 55 | if total_steps >= ROLLOUT_STEPS: break 56 | 57 | if done: break 58 | 59 | observations_hash = hash_object(observation_list) 60 | actions_hash = hash_object(action_list) 61 | rewards_hash = hash_object(reward_list) 62 | dones_hash = hash_object(done_list) 63 | 64 | env.close() 65 | return observations_hash, actions_hash, rewards_hash, dones_hash 66 | 67 | @pytest.mark.parametrize("spec", spec_list) 68 | def test_env_semantics(spec): 69 | logger.warn("Skipping this test. Existing hashes were generated in a bad way") 70 | return 71 | with open(ROLLOUT_FILE) as data_file: 72 | rollout_dict = json.load(data_file) 73 | 74 | if spec.id not in rollout_dict: 75 | if not spec.nondeterministic: 76 | logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id)) 77 | return 78 | 79 | logger.info("Testing rollout for {} environment...".format(spec.id)) 80 | 81 | observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec) 82 | 83 | errors = [] 84 | if rollout_dict[spec.id]['observations'] != observations_now: 85 | errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now)) 86 | if rollout_dict[spec.id]['actions'] != actions_now: 87 | errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now)) 88 | if rollout_dict[spec.id]['rewards'] != rewards_now: 89 | errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now)) 90 | if rollout_dict[spec.id]['dones'] != dones_now: 91 | errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now)) 92 | if len(errors): 93 | for error in errors: 94 | logger.warn(error) 95 | raise ValueError(errors) 96 | -------------------------------------------------------------------------------- /gym/utils/seeding.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import numpy as np 3 | import os 4 | import random as _random 5 | from six import integer_types 6 | import struct 7 | import sys 8 | 9 | from gym import error 10 | 11 | def np_random(seed=None): 12 | if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed): 13 | raise error.Error('Seed must be a non-negative integer or omitted, not {}'.format(seed)) 14 | 15 | seed = create_seed(seed) 16 | 17 | rng = np.random.RandomState() 18 | rng.seed(_int_list_from_bigint(hash_seed(seed))) 19 | return rng, seed 20 | 21 | def hash_seed(seed=None, max_bytes=8): 22 | """Any given evaluation is likely to have many PRNG's active at 23 | once. (Most commonly, because the environment is running in 24 | multiple processes.) There's literature indicating that having 25 | linear correlations between seeds of multiple PRNG's can correlate 26 | the outputs: 27 | 28 | http://blogs.unity3d.com/2015/01/07/a-primer-on-repeatable-random-numbers/ 29 | http://stackoverflow.com/questions/1554958/how-different-do-random-seeds-need-to-be 30 | http://dl.acm.org/citation.cfm?id=1276928 31 | 32 | Thus, for sanity we hash the seeds before using them. (This scheme 33 | is likely not crypto-strength, but it should be good enough to get 34 | rid of simple correlations.) 35 | 36 | Args: 37 | seed (Optional[int]): None seeds from an operating system specific randomness source. 38 | max_bytes: Maximum number of bytes to use in the hashed seed. 39 | """ 40 | if seed is None: 41 | seed = create_seed(max_bytes=max_bytes) 42 | hash = hashlib.sha512(str(seed).encode('utf8')).digest() 43 | return _bigint_from_bytes(hash[:max_bytes]) 44 | 45 | def create_seed(a=None, max_bytes=8): 46 | """Create a strong random seed. Otherwise, Python 2 would seed using 47 | the system time, which might be non-robust especially in the 48 | presence of concurrency. 49 | 50 | Args: 51 | a (Optional[int, str]): None seeds from an operating system specific randomness source. 52 | max_bytes: Maximum number of bytes to use in the seed. 53 | """ 54 | # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py 55 | if a is None: 56 | a = _bigint_from_bytes(os.urandom(max_bytes)) 57 | elif isinstance(a, str): 58 | a = a.encode('utf8') 59 | a += hashlib.sha512(a).digest() 60 | a = _bigint_from_bytes(a[:max_bytes]) 61 | elif isinstance(a, integer_types): 62 | a = a % 2**(8 * max_bytes) 63 | else: 64 | raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a)) 65 | 66 | return a 67 | 68 | # TODO: don't hardcode sizeof_int here 69 | def _bigint_from_bytes(bytes): 70 | sizeof_int = 4 71 | padding = sizeof_int - len(bytes) % sizeof_int 72 | bytes += b'\0' * padding 73 | int_count = int(len(bytes) / sizeof_int) 74 | unpacked = struct.unpack("{}I".format(int_count), bytes) 75 | accum = 0 76 | for i, val in enumerate(unpacked): 77 | accum += 2 ** (sizeof_int * 8 * i) * val 78 | return accum 79 | 80 | def _int_list_from_bigint(bigint): 81 | # Special case 0 82 | if bigint < 0: 83 | raise error.Error('Seed must be non-negative, not {}'.format(bigint)) 84 | elif bigint == 0: 85 | return [0] 86 | 87 | ints = [] 88 | while bigint > 0: 89 | bigint, mod = divmod(bigint, 2 ** 32) 90 | ints.append(mod) 91 | return ints 92 | -------------------------------------------------------------------------------- /examples/agents/cem.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import gym 4 | from gym import wrappers, logger 5 | import numpy as np 6 | from six.moves import cPickle as pickle 7 | import json, sys, os 8 | from os import path 9 | from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled 10 | import argparse 11 | 12 | def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0): 13 | """ 14 | Generic implementation of the cross-entropy method for maximizing a black-box function 15 | 16 | f: a function mapping from vector -> scalar 17 | th_mean: initial mean over input distribution 18 | batch_size: number of samples of theta to evaluate per batch 19 | n_iter: number of batches 20 | elite_frac: each batch, select this fraction of the top-performing samples 21 | initial_std: initial standard deviation over parameter vectors 22 | """ 23 | n_elite = int(np.round(batch_size*elite_frac)) 24 | th_std = np.ones_like(th_mean) * initial_std 25 | 26 | for _ in range(n_iter): 27 | ths = np.array([th_mean + dth for dth in th_std[None,:]*np.random.randn(batch_size, th_mean.size)]) 28 | ys = np.array([f(th) for th in ths]) 29 | elite_inds = ys.argsort()[::-1][:n_elite] 30 | elite_ths = ths[elite_inds] 31 | th_mean = elite_ths.mean(axis=0) 32 | th_std = elite_ths.std(axis=0) 33 | yield {'ys' : ys, 'theta_mean' : th_mean, 'y_mean' : ys.mean()} 34 | 35 | def do_rollout(agent, env, num_steps, render=False): 36 | total_rew = 0 37 | ob = env.reset() 38 | for t in range(num_steps): 39 | a = agent.act(ob) 40 | (ob, reward, done, _info) = env.step(a) 41 | total_rew += reward 42 | if render and t%3==0: env.render() 43 | if done: break 44 | return total_rew, t+1 45 | 46 | if __name__ == '__main__': 47 | logger.set_level(logger.INFO) 48 | 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument('--display', action='store_true') 51 | parser.add_argument('target', nargs="?", default="CartPole-v0") 52 | args = parser.parse_args() 53 | 54 | env = gym.make(args.target) 55 | env.seed(0) 56 | np.random.seed(0) 57 | params = dict(n_iter=10, batch_size=25, elite_frac = 0.2) 58 | num_steps = 200 59 | 60 | # You provide the directory to write to (can be an existing 61 | # directory, but can't contain previous monitor results. You can 62 | # also dump to a tempdir if you'd like: tempfile.mkdtemp(). 63 | outdir = '/tmp/cem-agent-results' 64 | env = wrappers.Monitor(env, outdir, force=True) 65 | 66 | # Prepare snapshotting 67 | # ---------------------------------------- 68 | def writefile(fname, s): 69 | with open(path.join(outdir, fname), 'w') as fh: fh.write(s) 70 | info = {} 71 | info['params'] = params 72 | info['argv'] = sys.argv 73 | info['env_id'] = env.spec.id 74 | # ------------------------------------------ 75 | 76 | def noisy_evaluation(theta): 77 | agent = BinaryActionLinearPolicy(theta) 78 | rew, T = do_rollout(agent, env, num_steps) 79 | return rew 80 | 81 | # Train the agent, and snapshot each stage 82 | for (i, iterdata) in enumerate( 83 | cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)): 84 | print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean'])) 85 | agent = BinaryActionLinearPolicy(iterdata['theta_mean']) 86 | if args.display: do_rollout(agent, env, 200, render=True) 87 | writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1))) 88 | 89 | # Write out the env at the end so we store the parameters of this 90 | # environment. 91 | writefile('info.json', json.dumps(info)) 92 | 93 | env.close() 94 | -------------------------------------------------------------------------------- /gym/error.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class Error(Exception): 4 | pass 5 | 6 | # Local errors 7 | 8 | class Unregistered(Error): 9 | """Raised when the user requests an item from the registry that does 10 | not actually exist. 11 | """ 12 | pass 13 | 14 | class UnregisteredEnv(Unregistered): 15 | """Raised when the user requests an env from the registry that does 16 | not actually exist. 17 | """ 18 | pass 19 | 20 | class UnregisteredBenchmark(Unregistered): 21 | """Raised when the user requests an env from the registry that does 22 | not actually exist. 23 | """ 24 | pass 25 | 26 | class DeprecatedEnv(Error): 27 | """Raised when the user requests an env from the registry with an 28 | older version number than the latest env with the same name. 29 | """ 30 | pass 31 | 32 | class UnseedableEnv(Error): 33 | """Raised when the user tries to seed an env that does not support 34 | seeding. 35 | """ 36 | pass 37 | 38 | class DependencyNotInstalled(Error): 39 | pass 40 | 41 | class UnsupportedMode(Exception): 42 | """Raised when the user requests a rendering mode not supported by the 43 | environment. 44 | """ 45 | pass 46 | 47 | class ResetNeeded(Exception): 48 | """When the monitor is active, raised when the user tries to step an 49 | environment that's already done. 50 | """ 51 | pass 52 | 53 | class ResetNotAllowed(Exception): 54 | """When the monitor is active, raised when the user tries to step an 55 | environment that's not yet done. 56 | """ 57 | pass 58 | 59 | class InvalidAction(Exception): 60 | """Raised when the user performs an action not contained within the 61 | action space 62 | """ 63 | pass 64 | 65 | # API errors 66 | 67 | class APIError(Error): 68 | def __init__(self, message=None, http_body=None, http_status=None, 69 | json_body=None, headers=None): 70 | super(APIError, self).__init__(message) 71 | 72 | if http_body and hasattr(http_body, 'decode'): 73 | try: 74 | http_body = http_body.decode('utf-8') 75 | except: 76 | http_body = ('') 78 | 79 | self._message = message 80 | self.http_body = http_body 81 | self.http_status = http_status 82 | self.json_body = json_body 83 | self.headers = headers or {} 84 | self.request_id = self.headers.get('request-id', None) 85 | 86 | def __unicode__(self): 87 | if self.request_id is not None: 88 | msg = self._message or "" 89 | return u"Request {0}: {1}".format(self.request_id, msg) 90 | else: 91 | return self._message 92 | 93 | def __str__(self): 94 | try: # Python 2 95 | return unicode(self).encode('utf-8') 96 | except NameError: # Python 3 97 | return self.__unicode__() 98 | 99 | 100 | class APIConnectionError(APIError): 101 | pass 102 | 103 | 104 | class InvalidRequestError(APIError): 105 | 106 | def __init__(self, message, param, http_body=None, 107 | http_status=None, json_body=None, headers=None): 108 | super(InvalidRequestError, self).__init__( 109 | message, http_body, http_status, json_body, 110 | headers) 111 | self.param = param 112 | 113 | 114 | class AuthenticationError(APIError): 115 | pass 116 | 117 | class RateLimitError(APIError): 118 | pass 119 | 120 | # Video errors 121 | 122 | class VideoRecorderError(Error): 123 | pass 124 | 125 | class InvalidFrame(Error): 126 | pass 127 | 128 | # Wrapper errors 129 | 130 | class DoubleWrapperError(Error): 131 | pass 132 | 133 | 134 | class WrapAfterConfigureError(Error): 135 | pass 136 | 137 | 138 | class RetriesExceededError(Error): 139 | pass 140 | -------------------------------------------------------------------------------- /gym/envs/robotics/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import error 4 | try: 5 | import mujoco_py 6 | except ImportError as e: 7 | raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e)) 8 | 9 | 10 | def robot_get_obs(sim): 11 | """Returns all joint positions and velocities associated with 12 | a robot. 13 | """ 14 | if sim.data.qpos is not None and sim.model.joint_names: 15 | names = [n for n in sim.model.joint_names if n.startswith('robot')] 16 | return ( 17 | np.array([sim.data.get_joint_qpos(name) for name in names]), 18 | np.array([sim.data.get_joint_qvel(name) for name in names]), 19 | ) 20 | return np.zeros(0), np.zeros(0) 21 | 22 | 23 | def ctrl_set_action(sim, action): 24 | """For torque actuators it copies the action into mujoco ctrl field. 25 | For position actuators it sets the target relative to the current qpos. 26 | """ 27 | if sim.model.nmocap > 0: 28 | _, action = np.split(action, (sim.model.nmocap * 7, )) 29 | if sim.data.ctrl is not None: 30 | for i in range(action.shape[0]): 31 | if sim.model.actuator_biastype[i] == 0: 32 | sim.data.ctrl[i] = action[i] 33 | else: 34 | idx = sim.model.jnt_qposadr[sim.model.actuator_trnid[i, 0]] 35 | sim.data.ctrl[i] = sim.data.qpos[idx] + action[i] 36 | 37 | 38 | def mocap_set_action(sim, action): 39 | """The action controls the robot using mocaps. Specifically, bodies 40 | on the robot (for example the gripper wrist) is controlled with 41 | mocap bodies. In this case the action is the desired difference 42 | in position and orientation (quaternion), in world coordinates, 43 | of the of the target body. The mocap is positioned relative to 44 | the target body according to the delta, and the MuJoCo equality 45 | constraint optimizer tries to center the welded body on the mocap. 46 | """ 47 | if sim.model.nmocap > 0: 48 | action, _ = np.split(action, (sim.model.nmocap * 7, )) 49 | action = action.reshape(sim.model.nmocap, 7) 50 | 51 | pos_delta = action[:, :3] 52 | quat_delta = action[:, 3:] 53 | 54 | reset_mocap2body_xpos(sim) 55 | sim.data.mocap_pos[:] = sim.data.mocap_pos + pos_delta 56 | sim.data.mocap_quat[:] = sim.data.mocap_quat + quat_delta 57 | 58 | 59 | def reset_mocap_welds(sim): 60 | """Resets the mocap welds that we use for actuation. 61 | """ 62 | if sim.model.nmocap > 0 and sim.model.eq_data is not None: 63 | for i in range(sim.model.eq_data.shape[0]): 64 | if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD: 65 | sim.model.eq_data[i, :] = np.array( 66 | [0., 0., 0., 1., 0., 0., 0.]) 67 | sim.forward() 68 | 69 | 70 | def reset_mocap2body_xpos(sim): 71 | """Resets the position and orientation of the mocap bodies to the same 72 | values as the bodies they're welded to. 73 | """ 74 | 75 | if (sim.model.eq_type is None or 76 | sim.model.eq_obj1id is None or 77 | sim.model.eq_obj2id is None): 78 | return 79 | for eq_type, obj1_id, obj2_id in zip(sim.model.eq_type, 80 | sim.model.eq_obj1id, 81 | sim.model.eq_obj2id): 82 | if eq_type != mujoco_py.const.EQ_WELD: 83 | continue 84 | 85 | mocap_id = sim.model.body_mocapid[obj1_id] 86 | if mocap_id != -1: 87 | # obj1 is the mocap, obj2 is the welded body 88 | body_idx = obj2_id 89 | else: 90 | # obj2 is the mocap, obj1 is the welded body 91 | mocap_id = sim.model.body_mocapid[obj2_id] 92 | body_idx = obj1_id 93 | 94 | assert (mocap_id != -1) 95 | sim.data.mocap_pos[mocap_id][:] = sim.data.body_xpos[body_idx] 96 | sim.data.mocap_quat[mocap_id][:] = sim.data.body_xquat[body_idx] 97 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/stats_recorder.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import time 4 | 5 | from gym import error 6 | from gym.utils import atomic_write 7 | from gym.utils.json_utils import json_encode_np 8 | 9 | class StatsRecorder(object): 10 | def __init__(self, directory, file_prefix, autoreset=False, env_id=None): 11 | self.autoreset = autoreset 12 | self.env_id = env_id 13 | 14 | self.initial_reset_timestamp = None 15 | self.directory = directory 16 | self.file_prefix = file_prefix 17 | self.episode_lengths = [] 18 | self.episode_rewards = [] 19 | self.episode_types = [] # experimental addition 20 | self._type = 't' 21 | self.timestamps = [] 22 | self.steps = None 23 | self.total_steps = 0 24 | self.rewards = None 25 | 26 | self.done = None 27 | self.closed = False 28 | 29 | filename = '{}.stats.json'.format(self.file_prefix) 30 | self.path = os.path.join(self.directory, filename) 31 | 32 | @property 33 | def type(self): 34 | return self._type 35 | 36 | @type.setter 37 | def type(self, type): 38 | if type not in ['t', 'e']: 39 | raise error.Error('Invalid episode type {}: must be t for training or e for evaluation', type) 40 | self._type = type 41 | 42 | def before_step(self, action): 43 | assert not self.closed 44 | 45 | if self.done: 46 | raise error.ResetNeeded("Trying to step environment which is currently done. While the monitor is active for {}, you cannot step beyond the end of an episode. Call 'env.reset()' to start the next episode.".format(self.env_id)) 47 | elif self.steps is None: 48 | raise error.ResetNeeded("Trying to step an environment before reset. While the monitor is active for {}, you must call 'env.reset()' before taking an initial step.".format(self.env_id)) 49 | 50 | def after_step(self, observation, reward, done, info): 51 | self.steps += 1 52 | self.total_steps += 1 53 | self.rewards += reward 54 | self.done = done 55 | 56 | if done: 57 | self.save_complete() 58 | 59 | if done: 60 | if self.autoreset: 61 | self.before_reset() 62 | self.after_reset(observation) 63 | 64 | def before_reset(self): 65 | assert not self.closed 66 | 67 | if self.done is not None and not self.done and self.steps > 0: 68 | raise error.Error("Tried to reset environment which is not done. While the monitor is active for {}, you cannot call reset() unless the episode is over.".format(self.env_id)) 69 | 70 | self.done = False 71 | if self.initial_reset_timestamp is None: 72 | self.initial_reset_timestamp = time.time() 73 | 74 | def after_reset(self, observation): 75 | self.steps = 0 76 | self.rewards = 0 77 | # We write the type at the beginning of the episode. If a user 78 | # changes the type, it's more natural for it to apply next 79 | # time the user calls reset(). 80 | self.episode_types.append(self._type) 81 | 82 | def save_complete(self): 83 | if self.steps is not None: 84 | self.episode_lengths.append(self.steps) 85 | self.episode_rewards.append(float(self.rewards)) 86 | self.timestamps.append(time.time()) 87 | 88 | def close(self): 89 | self.flush() 90 | self.closed = True 91 | 92 | def flush(self): 93 | if self.closed: 94 | return 95 | 96 | with atomic_write.atomic_write(self.path) as f: 97 | json.dump({ 98 | 'initial_reset_timestamp': self.initial_reset_timestamp, 99 | 'timestamps': self.timestamps, 100 | 'episode_lengths': self.episode_lengths, 101 | 'episode_rewards': self.episode_rewards, 102 | 'episode_types': self.episode_types, 103 | }, f, default=json_encode_np) 104 | -------------------------------------------------------------------------------- /scripts/generate_json.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from gym import envs, spaces, logger 3 | import json 4 | import os 5 | import sys 6 | import argparse 7 | 8 | from gym.envs.tests.spec_list import should_skip_env_spec_for_tests 9 | from gym.envs.tests.test_envs_semantics import generate_rollout_hash, hash_object 10 | 11 | DATA_DIR = os.path.join(os.path.dirname(__file__), os.pardir, 'gym', 'envs', 'tests') 12 | ROLLOUT_STEPS = 100 13 | episodes = ROLLOUT_STEPS 14 | steps = ROLLOUT_STEPS 15 | 16 | ROLLOUT_FILE = os.path.join(DATA_DIR, 'rollout.json') 17 | 18 | if not os.path.isfile(ROLLOUT_FILE): 19 | logger.info("No rollout file found. Writing empty json file to {}".format(ROLLOUT_FILE)) 20 | with open(ROLLOUT_FILE, "w") as outfile: 21 | json.dump({}, outfile, indent=2) 22 | 23 | def update_rollout_dict(spec, rollout_dict): 24 | """ 25 | Takes as input the environment spec for which the rollout is to be generated, 26 | and the existing dictionary of rollouts. Returns True iff the dictionary was 27 | modified. 28 | """ 29 | # Skip platform-dependent 30 | if should_skip_env_spec_for_tests(spec): 31 | logger.info("Skipping tests for {}".format(spec.id)) 32 | return False 33 | 34 | # Skip environments that are nondeterministic 35 | if spec.nondeterministic: 36 | logger.info("Skipping tests for nondeterministic env {}".format(spec.id)) 37 | return False 38 | 39 | logger.info("Generating rollout for {}".format(spec.id)) 40 | 41 | try: 42 | observations_hash, actions_hash, rewards_hash, dones_hash = generate_rollout_hash(spec) 43 | except: 44 | # If running the env generates an exception, don't write to the rollout file 45 | logger.warn("Exception {} thrown while generating rollout for {}. Rollout not added.".format(sys.exc_info()[0], spec.id)) 46 | return False 47 | 48 | rollout = {} 49 | rollout['observations'] = observations_hash 50 | rollout['actions'] = actions_hash 51 | rollout['rewards'] = rewards_hash 52 | rollout['dones'] = dones_hash 53 | 54 | existing = rollout_dict.get(spec.id) 55 | if existing: 56 | differs = False 57 | for key, new_hash in rollout.items(): 58 | differs = differs or existing[key] != new_hash 59 | if not differs: 60 | logger.debug("Hashes match with existing for {}".format(spec.id)) 61 | return False 62 | else: 63 | logger.warn("Got new hash for {}. Overwriting.".format(spec.id)) 64 | 65 | rollout_dict[spec.id] = rollout 66 | return True 67 | 68 | def add_new_rollouts(spec_ids, overwrite): 69 | environments = [spec for spec in envs.registry.all() if spec._entry_point is not None] 70 | if spec_ids: 71 | environments = [spec for spec in environments if spec.id in spec_ids] 72 | assert len(environments) == len(spec_ids), "Some specs not found" 73 | with open(ROLLOUT_FILE) as data_file: 74 | rollout_dict = json.load(data_file) 75 | modified = False 76 | for spec in environments: 77 | if not overwrite and spec.id in rollout_dict: 78 | logger.debug("Rollout already exists for {}. Skipping.".format(spec.id)) 79 | else: 80 | modified = update_rollout_dict(spec, rollout_dict) or modified 81 | 82 | if modified: 83 | logger.info("Writing new rollout file to {}".format(ROLLOUT_FILE)) 84 | with open(ROLLOUT_FILE, "w") as outfile: 85 | json.dump(rollout_dict, outfile, indent=2, sort_keys=True) 86 | else: 87 | logger.info("No modifications needed.") 88 | 89 | if __name__ == '__main__': 90 | parser = argparse.ArgumentParser() 91 | parser.add_argument('-f', '--force', action='store_true', help='Overwrite '+ 92 | 'existing rollouts if hashes differ.') 93 | parser.add_argument('-v', '--verbose', action='store_true') 94 | parser.add_argument('specs', nargs='*', help='ids of env specs to check (default: all)') 95 | args = parser.parse_args() 96 | if args.verbose: 97 | logger.set_level(logger.INFO) 98 | add_new_rollouts(args.specs, args.force) 99 | -------------------------------------------------------------------------------- /gym/envs/toy_text/cliffwalking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | from gym.envs.toy_text import discrete 4 | 5 | UP = 0 6 | RIGHT = 1 7 | DOWN = 2 8 | LEFT = 3 9 | 10 | 11 | class CliffWalkingEnv(discrete.DiscreteEnv): 12 | """ 13 | This is a simple implementation of the Gridworld Cliff 14 | reinforcement learning task. 15 | 16 | Adapted from Example 6.6 (page 132) from Reinforcement Learning: An Introduction 17 | by Sutton and Barto: 18 | http://incompleteideas.net/book/the-book-2nd.html 19 | 20 | With inspiration from: 21 | https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py 22 | 23 | The board is a 4x12 matrix, with (using Numpy matrix indexing): 24 | [3, 0] as the start at bottom-left 25 | [3, 11] as the goal at bottom-right 26 | [3, 1..10] as the cliff at bottom-center 27 | 28 | Each time step incurs -1 reward, and stepping into the cliff incurs -100 reward 29 | and a reset to the start. An episode terminates when the agent reaches the goal. 30 | """ 31 | metadata = {'render.modes': ['human', 'ansi']} 32 | 33 | def __init__(self): 34 | self.shape = (4, 12) 35 | self.start_state_index = np.ravel_multi_index((3, 0), self.shape) 36 | 37 | nS = np.prod(self.shape) 38 | nA = 4 39 | 40 | # Cliff Location 41 | self._cliff = np.zeros(self.shape, dtype=np.bool) 42 | self._cliff[3, 1:-1] = True 43 | 44 | # Calculate transition probabilities and rewards 45 | P = {} 46 | for s in range(nS): 47 | position = np.unravel_index(s, self.shape) 48 | P[s] = {a: [] for a in range(nA)} 49 | P[s][UP] = self._calculate_transition_prob(position, [-1, 0]) 50 | P[s][RIGHT] = self._calculate_transition_prob(position, [0, 1]) 51 | P[s][DOWN] = self._calculate_transition_prob(position, [1, 0]) 52 | P[s][LEFT] = self._calculate_transition_prob(position, [0, -1]) 53 | 54 | # Calculate initial state distribution 55 | # We always start in state (3, 0) 56 | isd = np.zeros(nS) 57 | isd[self.start_state_index] = 1.0 58 | 59 | super(CliffWalkingEnv, self).__init__(nS, nA, P, isd) 60 | 61 | def _limit_coordinates(self, coord): 62 | """ 63 | Prevent the agent from falling out of the grid world 64 | :param coord: 65 | :return: 66 | """ 67 | coord[0] = min(coord[0], self.shape[0] - 1) 68 | coord[0] = max(coord[0], 0) 69 | coord[1] = min(coord[1], self.shape[1] - 1) 70 | coord[1] = max(coord[1], 0) 71 | return coord 72 | 73 | def _calculate_transition_prob(self, current, delta): 74 | """ 75 | Determine the outcome for an action. Transition Prob is always 1.0. 76 | :param current: Current position on the grid as (row, col) 77 | :param delta: Change in position for transition 78 | :return: (1.0, new_state, reward, done) 79 | """ 80 | new_position = np.array(current) + np.array(delta) 81 | new_position = self._limit_coordinates(new_position).astype(int) 82 | new_state = np.ravel_multi_index(tuple(new_position), self.shape) 83 | if self._cliff[tuple(new_position)]: 84 | return [(1.0, self.start_state_index, -100, False)] 85 | 86 | terminal_state = (self.shape[0] - 1, self.shape[1] - 1) 87 | is_done = tuple(new_position) == terminal_state 88 | return [(1.0, new_state, -1, is_done)] 89 | 90 | def render(self, mode='human'): 91 | outfile = sys.stdout 92 | 93 | for s in range(self.nS): 94 | position = np.unravel_index(s, self.shape) 95 | if self.s == s: 96 | output = " x " 97 | # Print terminal state 98 | elif position == (3, 11): 99 | output = " T " 100 | elif self._cliff[position]: 101 | output = " C " 102 | else: 103 | output = " o " 104 | 105 | if position[1] == 0: 106 | output = output.lstrip() 107 | if position[1] == self.shape[1] - 1: 108 | output = output.rstrip() 109 | output += '\n' 110 | 111 | outfile.write(output) 112 | outfile.write('\n') 113 | 114 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/walker2d.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 63 | -------------------------------------------------------------------------------- /gym/envs/toy_text/blackjack.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | def cmp(a, b): 6 | return float(a > b) - float(a < b) 7 | 8 | # 1 = Ace, 2-10 = Number cards, Jack/Queen/King = 10 9 | deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10] 10 | 11 | 12 | def draw_card(np_random): 13 | return int(np_random.choice(deck)) 14 | 15 | 16 | def draw_hand(np_random): 17 | return [draw_card(np_random), draw_card(np_random)] 18 | 19 | 20 | def usable_ace(hand): # Does this hand have a usable ace? 21 | return 1 in hand and sum(hand) + 10 <= 21 22 | 23 | 24 | def sum_hand(hand): # Return current hand total 25 | if usable_ace(hand): 26 | return sum(hand) + 10 27 | return sum(hand) 28 | 29 | 30 | def is_bust(hand): # Is this hand a bust? 31 | return sum_hand(hand) > 21 32 | 33 | 34 | def score(hand): # What is the score of this hand (0 if bust) 35 | return 0 if is_bust(hand) else sum_hand(hand) 36 | 37 | 38 | def is_natural(hand): # Is this hand a natural blackjack? 39 | return sorted(hand) == [1, 10] 40 | 41 | 42 | class BlackjackEnv(gym.Env): 43 | """Simple blackjack environment 44 | 45 | Blackjack is a card game where the goal is to obtain cards that sum to as 46 | near as possible to 21 without going over. They're playing against a fixed 47 | dealer. 48 | Face cards (Jack, Queen, King) have point value 10. 49 | Aces can either count as 11 or 1, and it's called 'usable' at 11. 50 | This game is placed with an infinite deck (or with replacement). 51 | The game starts with each (player and dealer) having one face up and one 52 | face down card. 53 | 54 | The player can request additional cards (hit=1) until they decide to stop 55 | (stick=0) or exceed 21 (bust). 56 | 57 | After the player sticks, the dealer reveals their facedown card, and draws 58 | until their sum is 17 or greater. If the dealer goes bust the player wins. 59 | 60 | If neither player nor dealer busts, the outcome (win, lose, draw) is 61 | decided by whose sum is closer to 21. The reward for winning is +1, 62 | drawing is 0, and losing is -1. 63 | 64 | The observation of a 3-tuple of: the players current sum, 65 | the dealer's one showing card (1-10 where 1 is ace), 66 | and whether or not the player holds a usable ace (0 or 1). 67 | 68 | This environment corresponds to the version of the blackjack problem 69 | described in Example 5.1 in Reinforcement Learning: An Introduction 70 | by Sutton and Barto. 71 | http://incompleteideas.net/book/the-book-2nd.html 72 | """ 73 | def __init__(self, natural=False): 74 | self.action_space = spaces.Discrete(2) 75 | self.observation_space = spaces.Tuple(( 76 | spaces.Discrete(32), 77 | spaces.Discrete(11), 78 | spaces.Discrete(2))) 79 | self.seed() 80 | 81 | # Flag to payout 1.5 on a "natural" blackjack win, like casino rules 82 | # Ref: http://www.bicyclecards.com/how-to-play/blackjack/ 83 | self.natural = natural 84 | # Start the first game 85 | self.reset() 86 | 87 | def seed(self, seed=None): 88 | self.np_random, seed = seeding.np_random(seed) 89 | return [seed] 90 | 91 | def step(self, action): 92 | assert self.action_space.contains(action) 93 | if action: # hit: add a card to players hand and return 94 | self.player.append(draw_card(self.np_random)) 95 | if is_bust(self.player): 96 | done = True 97 | reward = -1 98 | else: 99 | done = False 100 | reward = 0 101 | else: # stick: play out the dealers hand, and score 102 | done = True 103 | while sum_hand(self.dealer) < 17: 104 | self.dealer.append(draw_card(self.np_random)) 105 | reward = cmp(score(self.player), score(self.dealer)) 106 | if self.natural and is_natural(self.player) and reward == 1: 107 | reward = 1.5 108 | return self._get_obs(), reward, done, {} 109 | 110 | def _get_obs(self): 111 | return (sum_hand(self.player), self.dealer[0], usable_ace(self.player)) 112 | 113 | def reset(self): 114 | self.dealer = draw_hand(self.np_random) 115 | self.player = draw_hand(self.np_random) 116 | return self._get_obs() 117 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/shared.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | --------------------------------------------------------------------------------