├── .dockerignore
├── gym
├── envs
│ ├── tests
│ │ ├── __init__.py
│ │ ├── spec_list.py
│ │ ├── test_registration.py
│ │ ├── test_envs.py
│ │ ├── test_determinism.py
│ │ └── test_envs_semantics.py
│ ├── algorithmic
│ │ ├── tests
│ │ │ └── __init__.py
│ │ ├── __init__.py
│ │ ├── copy_.py
│ │ ├── reverse.py
│ │ ├── repeat_copy.py
│ │ ├── duplicated_input.py
│ │ └── reversed_addition.py
│ ├── robotics
│ │ ├── assets
│ │ │ ├── stls
│ │ │ │ ├── .get
│ │ │ │ ├── hand
│ │ │ │ │ ├── F1.stl
│ │ │ │ │ ├── F2.stl
│ │ │ │ │ ├── F3.stl
│ │ │ │ │ ├── palm.stl
│ │ │ │ │ ├── TH1_z.stl
│ │ │ │ │ ├── TH2_z.stl
│ │ │ │ │ ├── TH3_z.stl
│ │ │ │ │ ├── knuckle.stl
│ │ │ │ │ ├── wrist.stl
│ │ │ │ │ ├── lfmetacarpal.stl
│ │ │ │ │ ├── forearm_electric.stl
│ │ │ │ │ └── forearm_electric_cvx.stl
│ │ │ │ └── fetch
│ │ │ │ │ ├── estop_link.stl
│ │ │ │ │ ├── laser_link.stl
│ │ │ │ │ ├── gripper_link.stl
│ │ │ │ │ ├── torso_fixed_link.stl
│ │ │ │ │ ├── base_link_collision.stl
│ │ │ │ │ ├── bellows_link_collision.stl
│ │ │ │ │ ├── head_pan_link_collision.stl
│ │ │ │ │ ├── l_wheel_link_collision.stl
│ │ │ │ │ ├── r_wheel_link_collision.stl
│ │ │ │ │ ├── elbow_flex_link_collision.stl
│ │ │ │ │ ├── head_tilt_link_collision.stl
│ │ │ │ │ ├── torso_lift_link_collision.stl
│ │ │ │ │ ├── wrist_flex_link_collision.stl
│ │ │ │ │ ├── wrist_roll_link_collision.stl
│ │ │ │ │ ├── forearm_roll_link_collision.stl
│ │ │ │ │ ├── shoulder_lift_link_collision.stl
│ │ │ │ │ ├── shoulder_pan_link_collision.stl
│ │ │ │ │ └── upperarm_roll_link_collision.stl
│ │ │ ├── textures
│ │ │ │ ├── block.png
│ │ │ │ └── block_hidden.png
│ │ │ ├── fetch
│ │ │ │ ├── reach.xml
│ │ │ │ ├── push.xml
│ │ │ │ ├── slide.xml
│ │ │ │ ├── pick_and_place.xml
│ │ │ │ └── shared.xml
│ │ │ └── hand
│ │ │ │ ├── reach.xml
│ │ │ │ ├── shared_asset.xml
│ │ │ │ ├── manipulate_pen.xml
│ │ │ │ ├── manipulate_egg.xml
│ │ │ │ └── manipulate_block.xml
│ │ ├── fetch
│ │ │ ├── __init__.py
│ │ │ ├── reach.py
│ │ │ ├── push.py
│ │ │ ├── pick_and_place.py
│ │ │ └── slide.py
│ │ ├── hand
│ │ │ └── __init__.py
│ │ ├── __init__.py
│ │ ├── hand_env.py
│ │ ├── README.md
│ │ └── utils.py
│ ├── atari
│ │ └── __init__.py
│ ├── classic_control
│ │ ├── assets
│ │ │ └── clockwise.png
│ │ ├── __init__.py
│ │ └── pendulum.py
│ ├── box2d
│ │ ├── __init__.py
│ │ └── test_lunar_lander.py
│ ├── unittest
│ │ └── __init__.py
│ ├── toy_text
│ │ ├── __init__.py
│ │ ├── roulette.py
│ │ ├── discrete.py
│ │ ├── nchain.py
│ │ ├── hotter_colder.py
│ │ ├── guessing_game.py
│ │ ├── cliffwalking.py
│ │ └── blackjack.py
│ ├── mujoco
│ │ ├── __init__.py
│ │ ├── inverted_pendulum.py
│ │ ├── swimmer.py
│ │ ├── half_cheetah.py
│ │ ├── assets
│ │ │ ├── inverted_pendulum.xml
│ │ │ ├── point.xml
│ │ │ ├── inverted_double_pendulum.xml
│ │ │ ├── swimmer.xml
│ │ │ ├── reacher.xml
│ │ │ ├── hopper.xml
│ │ │ └── walker2d.xml
│ │ ├── walker2d.py
│ │ ├── hopper.py
│ │ ├── inverted_double_pendulum.py
│ │ ├── reacher.py
│ │ ├── ant.py
│ │ ├── humanoidstandup.py
│ │ ├── pusher.py
│ │ ├── humanoid.py
│ │ ├── thrower.py
│ │ └── striker.py
│ └── README.md
├── spaces
│ ├── tests
│ │ ├── __init__.py
│ │ └── test_spaces.py
│ ├── __init__.py
│ ├── multi_binary.py
│ ├── prng.py
│ ├── discrete.py
│ ├── multi_discrete.py
│ ├── tuple_space.py
│ ├── box.py
│ └── dict_space.py
├── wrappers
│ ├── tests
│ │ └── __init__.py
│ ├── monitoring
│ │ ├── __init__.py
│ │ ├── tests
│ │ │ ├── __init__.py
│ │ │ ├── helpers.py
│ │ │ └── test_video_recorder.py
│ │ └── stats_recorder.py
│ ├── __init__.py
│ ├── dict.py
│ ├── README.md
│ └── time_limit.py
├── version.py
├── utils
│ ├── reraise_impl_py2.py
│ ├── reraise_impl_py3.py
│ ├── __init__.py
│ ├── tests
│ │ ├── test_seeding.py
│ │ └── test_atexit.py
│ ├── json_utils.py
│ ├── colorize.py
│ ├── ezpickle.py
│ ├── reraise.py
│ ├── atomic_write.py
│ ├── closer.py
│ └── seeding.py
├── tests
│ └── test_core.py
├── __init__.py
├── logger.py
└── error.py
├── requirements_dev.txt
├── requirements.txt
├── examples
├── scripts
│ ├── list_envs
│ ├── sim_env
│ └── benchmark_runner
└── agents
│ ├── _policies.py
│ ├── random_agent.py
│ ├── keyboard_agent.py
│ └── cem.py
├── unittest.cfg
├── docs
├── misc.md
├── readme.md
├── environments.md
└── agents.md
├── tox.ini
├── Makefile
├── .gitignore
├── bin
├── docker_entrypoint
└── render.py
├── .travis.yml
├── CODE_OF_CONDUCT.rst
├── test.dockerfile.14.04
├── test.dockerfile.18.04
├── LICENSE.md
├── test.dockerfile.16.04
├── setup.py
└── scripts
└── generate_json.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | .tox
2 |
--------------------------------------------------------------------------------
/gym/envs/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/spaces/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/wrappers/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/.get:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/envs/robotics/hand/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '0.10.9'
2 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | # Testing
2 | pytest
3 | mock
4 |
5 | -e .[all]
6 |
--------------------------------------------------------------------------------
/gym/envs/atari/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.atari.atari_env import AtariEnv
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.10.4
2 | requests>=2.0
3 | six
4 | pyglet>=1.2.0
5 | scipy==0.17.1
6 |
--------------------------------------------------------------------------------
/gym/utils/reraise_impl_py2.py:
--------------------------------------------------------------------------------
1 | def reraise_impl(e, traceback):
2 | raise e.__class__, e, traceback
3 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/F1.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/F2.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/F3.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/palm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/palm.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/textures/block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/textures/block.png
--------------------------------------------------------------------------------
/gym/envs/classic_control/assets/clockwise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/classic_control/assets/clockwise.png
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH1_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/TH1_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH2_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/TH2_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH3_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/TH3_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/knuckle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/knuckle.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/wrist.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/wrist.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/estop_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/estop_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/laser_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/laser_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/textures/block_hidden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/textures/block_hidden.png
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/gripper_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/gripper_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/forearm_electric.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/forearm_electric.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl
--------------------------------------------------------------------------------
/examples/scripts/list_envs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from gym import envs
3 | envids = [spec.id for spec in envs.registry.all()]
4 | for envid in sorted(envids):
5 | print(envid)
6 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/findmyway/gym/master/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl
--------------------------------------------------------------------------------
/gym/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from gym import error
2 | from gym.wrappers.monitor import Monitor
3 | from gym.wrappers.time_limit import TimeLimit
4 | from gym.wrappers.dict import FlattenDictWrapper
5 |
--------------------------------------------------------------------------------
/unittest.cfg:
--------------------------------------------------------------------------------
1 | [log-capture]
2 | always-on = True
3 | clear-handlers = True
4 | date-format = None
5 | filter = -nose
6 | log-level = NOTSET
7 |
8 | [output-buffer]
9 | always-on = True
10 | stderr = True
11 | stdout = True
12 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/helpers.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import shutil
3 | import tempfile
4 |
5 | @contextlib.contextmanager
6 | def tempdir():
7 | temp = tempfile.mkdtemp()
8 | yield temp
9 | shutil.rmtree(temp)
10 |
--------------------------------------------------------------------------------
/gym/utils/reraise_impl_py3.py:
--------------------------------------------------------------------------------
1 | # http://stackoverflow.com/a/33822606 -- `from None` disables Python 3'
2 | # semi-smart exception chaining, which we don't want in this case.
3 | def reraise_impl(e, traceback):
4 | raise e.with_traceback(traceback) from None
5 |
--------------------------------------------------------------------------------
/gym/envs/box2d/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.box2d.lunar_lander import LunarLander
2 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous
3 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
4 | from gym.envs.box2d.car_racing import CarRacing
5 |
--------------------------------------------------------------------------------
/gym/envs/unittest/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.unittest.cube_crash import CubeCrash
2 | from gym.envs.unittest.cube_crash import CubeCrashSparse
3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack
4 | from gym.envs.unittest.memorize_digits import MemorizeDigits
5 |
6 |
--------------------------------------------------------------------------------
/docs/misc.md:
--------------------------------------------------------------------------------
1 | # Miscellaneous
2 |
3 | Here we have a bunch of tools, libs, apis, tutorials, resources, etc. provided by the community to add value to the gym ecosystem.
4 |
5 | ## OpenAIGym.jl
6 |
7 | Convenience wrapper of the OpenAI Gym for the Julia language [/tbreloff/OpenAIGym.jl](https://github.com/tbreloff/OpenAIGym.jl)
--------------------------------------------------------------------------------
/gym/envs/algorithmic/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.algorithmic.copy_ import CopyEnv
2 | from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv
3 | from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv
4 | from gym.envs.algorithmic.reverse import ReverseEnv
5 | from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv
6 |
--------------------------------------------------------------------------------
/gym/envs/classic_control/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.classic_control.cartpole import CartPoleEnv
2 | from gym.envs.classic_control.mountain_car import MountainCarEnv
3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
4 | from gym.envs.classic_control.pendulum import PendulumEnv
5 | from gym.envs.classic_control.acrobot import AcrobotEnv
6 |
7 |
--------------------------------------------------------------------------------
/gym/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.spaces.box import Box
2 | from gym.spaces.discrete import Discrete
3 | from gym.spaces.multi_discrete import MultiDiscrete
4 | from gym.spaces.multi_binary import MultiBinary
5 | from gym.spaces.prng import seed, np_random
6 | from gym.spaces.tuple_space import Tuple
7 | from gym.spaces.dict_space import Dict
8 |
9 | __all__ = ["Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict"]
10 |
--------------------------------------------------------------------------------
/gym/tests/test_core.py:
--------------------------------------------------------------------------------
1 | from gym import core
2 |
3 | class ArgumentEnv(core.Env):
4 | calls = 0
5 |
6 | def __init__(self, arg):
7 | self.calls += 1
8 | self.arg = arg
9 |
10 | def test_env_instantiation():
11 | # This looks like a pretty trivial, but given our usage of
12 | # __new__, it's worth having.
13 | env = ArgumentEnv('arg')
14 | assert env.arg == 'arg'
15 | assert env.calls == 1
16 |
--------------------------------------------------------------------------------
/gym/__init__.py:
--------------------------------------------------------------------------------
1 | import distutils.version
2 | import os
3 | import sys
4 | import warnings
5 |
6 | from gym import error
7 | from gym.utils import reraise
8 | from gym.version import VERSION as __version__
9 |
10 | from gym.core import Env, GoalEnv, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
11 | from gym.envs import make, spec
12 | from gym import logger
13 |
14 | __all__ = ["Env", "Space", "Wrapper", "make", "spec"]
15 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/copy_.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to copy content from the input tape to
3 | the output tape. http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 | class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
8 | def __init__(self, base=5, chars=True):
9 | super(CopyEnv, self).__init__(base=base, chars=chars)
10 |
11 | def target_from_input_data(self, input_data):
12 | return input_data
13 |
14 |
--------------------------------------------------------------------------------
/gym/envs/box2d/test_lunar_lander.py:
--------------------------------------------------------------------------------
1 | from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
2 |
3 | def test_lunar_lander():
4 | _test_lander(LunarLander(), seed=0)
5 |
6 | def test_lunar_lander_continuous():
7 | _test_lander(LunarLanderContinuous(), seed=0)
8 |
9 | def _test_lander(env, seed=None, render=False):
10 | total_reward = demo_heuristic_lander(env, seed=seed, render=render)
11 | assert total_reward > 100
12 |
13 |
14 |
--------------------------------------------------------------------------------
/gym/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """A set of common utilities used within the environments. These are
2 | not intended as API functions, and will not remain stable over time.
3 | """
4 |
5 | # These submodules should not have any import-time dependencies.
6 | # We want this since we use `utils` during our import-time sanity checks
7 | # that verify that our dependencies are actually present.
8 | from .colorize import colorize
9 | from .ezpickle import EzPickle
10 | from .reraise import reraise
11 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | # Tox (http://tox.testrun.org/) is a tool for running tests
2 | # in multiple virtualenvs. This configuration file will run the
3 | # test suite on all supported python versions. To use it, "pip install tox"
4 | # and then run "tox" from this directory.
5 |
6 | [tox]
7 | envlist = py3
8 |
9 | [testenv:py3]
10 | whitelist_externals=make
11 | passenv=DISPLAY TRAVIS*
12 | deps =
13 | pytest
14 | mock
15 | -e .[all]
16 | commands =
17 | pytest {posargs}
18 |
19 |
--------------------------------------------------------------------------------
/gym/utils/tests/test_seeding.py:
--------------------------------------------------------------------------------
1 | from gym import error
2 | from gym.utils import seeding
3 |
4 | def test_invalid_seeds():
5 | for seed in [-1, 'test']:
6 | try:
7 | seeding.np_random(seed)
8 | except error.Error:
9 | pass
10 | else:
11 | assert False, 'Invalid seed {} passed validation'.format(seed)
12 |
13 | def test_valid_seeds():
14 | for seed in [0, 1]:
15 | random, seed1 = seeding.np_random(seed)
16 | assert seed == seed1
17 |
--------------------------------------------------------------------------------
/docs/readme.md:
--------------------------------------------------------------------------------
1 | # Table of Contents
2 |
3 | - [Agents](agents.md) contains a listing of agents compatible with gym environments. Agents facilitate the running of an algorithm against an environment.
4 |
5 | - [Environments](environments.md) lists more environments to run your algorithms against. These do not come prepackaged with the gym.
6 |
7 | - [Miscellaneous](misc.md) is a collection of other value-add tools and utilities. These could be anything from a small convenience lib to a collection of video tutorials or a new language binding.
8 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/reverse.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to reverse content over the input tape.
3 | http://arxiv.org/abs/1511.07275
4 | """
5 |
6 | from gym.envs.algorithmic import algorithmic_env
7 |
8 | class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
10 | def __init__(self, base=2):
11 | super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
12 | self.last = 50
13 |
14 | def target_from_input_data(self, input_str):
15 | return list(reversed(input_str))
16 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.toy_text.blackjack import BlackjackEnv
2 | from gym.envs.toy_text.roulette import RouletteEnv
3 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv
4 | from gym.envs.toy_text.nchain import NChainEnv
5 | from gym.envs.toy_text.hotter_colder import HotterColder
6 | from gym.envs.toy_text.guessing_game import GuessingGame
7 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv
8 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv
9 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv
10 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: install test
2 |
3 | install:
4 | pip install -r requirements.txt
5 |
6 | base:
7 | docker pull ubuntu:14.04
8 | docker tag ubuntu:14.04 quay.io/openai/gym:base
9 | docker push quay.io/openai/gym:base
10 |
11 | test:
12 | docker build -f test.dockerfile -t quay.io/openai/gym:test .
13 | docker push quay.io/openai/gym:test
14 |
15 | upload:
16 | rm -rf dist
17 | python setup.py sdist
18 | twine upload dist/*
19 |
20 | docker-build:
21 | docker build -t quay.io/openai/gym .
22 |
23 | docker-run:
24 | docker run -ti quay.io/openai/gym bash
25 |
--------------------------------------------------------------------------------
/gym/envs/robotics/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.robotics.fetch_env import FetchEnv
2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv
3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv
4 | from gym.envs.robotics.fetch.push import FetchPushEnv
5 | from gym.envs.robotics.fetch.reach import FetchReachEnv
6 |
7 | from gym.envs.robotics.hand.reach import HandReachEnv
8 | from gym.envs.robotics.hand.manipulate import HandBlockEnv
9 | from gym.envs.robotics.hand.manipulate import HandEggEnv
10 | from gym.envs.robotics.hand.manipulate import HandPenEnv
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | *.py~
4 | .DS_Store
5 | .cache
6 | .pytest_cache/
7 |
8 | # Setuptools distribution and build folders.
9 | /dist/
10 | /build
11 |
12 | # Virtualenv
13 | /env
14 |
15 | # Python egg metadata, regenerated from source files by setuptools.
16 | /*.egg-info
17 |
18 | *.sublime-project
19 | *.sublime-workspace
20 |
21 | logs/
22 |
23 | .ipynb_checkpoints
24 | ghostdriver.log
25 |
26 | junk
27 | MUJOCO_LOG.txt
28 |
29 | rllab_mujoco
30 |
31 | tutorial/*.html
32 |
33 | # IDE files
34 | .eggs
35 | .tox
36 |
37 | # PyCharm project files
38 | .idea
39 | vizdoom.ini
40 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/repeat_copy.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to copy content multiple times from the input tape to
3 | the output tape. http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 | class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
8 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
9 | def __init__(self, base=5):
10 | super(RepeatCopyEnv, self).__init__(base=base, chars=True)
11 | self.last = 50
12 |
13 | def target_from_input_data(self, input_data):
14 | return input_data + list(reversed(input_data)) + input_data
15 |
16 |
--------------------------------------------------------------------------------
/gym/utils/tests/test_atexit.py:
--------------------------------------------------------------------------------
1 | from gym.utils.closer import Closer
2 |
3 | class Closeable(object):
4 | close_called = False
5 | def close(self):
6 | self.close_called = True
7 |
8 | def test_register_unregister():
9 | registry = Closer(atexit_register=False)
10 | c1 = Closeable()
11 | c2 = Closeable()
12 |
13 | assert not c1.close_called
14 | assert not c2.close_called
15 | registry.register(c1)
16 | id2 = registry.register(c2)
17 |
18 | registry.unregister(id2)
19 | registry.close()
20 | assert c1.close_called
21 | assert not c2.close_called
22 |
--------------------------------------------------------------------------------
/bin/docker_entrypoint:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is the entrypoint for our Docker image.
3 |
4 | set -ex
5 |
6 | # Set up display; otherwise rendering will fail
7 | Xvfb -screen 0 1024x768x24 &
8 | export DISPLAY=:0
9 |
10 | # Wait for the file to come up
11 | display=0
12 | file="/tmp/.X11-unix/X$display"
13 | for i in $(seq 1 10); do
14 | if [ -e "$file" ]; then
15 | break
16 | fi
17 |
18 | echo "Waiting for $file to be created (try $i/10)"
19 | sleep "$i"
20 | done
21 | if ! [ -e "$file" ]; then
22 | echo "Timing out: $file was not created"
23 | exit 1
24 | fi
25 |
26 | exec "$@"
27 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 | language: python
3 | services:
4 | - docker
5 | env:
6 | # - UBUNTU_VER=14.04 - problems with atari-py
7 | - UBUNTU_VER=16.04
8 | - UBUNTU_VER=18.04
9 |
10 | install: "" # so travis doesn't do pip install requirements.txt
11 | script:
12 | - docker build -f test.dockerfile.${UBUNTU_VER} -t gym-test --build-arg MUJOCO_KEY=$MUJOCO_KEY .
13 | - docker run -e MUJOCO_KEY=$MUJOCO_KEY gym-test tox
14 |
15 | deploy:
16 | provider: pypi
17 | username: $TWINE_USERNAME
18 | password: $TWINE_PASSWORD
19 | on:
20 | tags: true
21 | condition: $UBUNTU_VER = 16.04
22 |
--------------------------------------------------------------------------------
/bin/render.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import argparse
3 | import gym
4 |
5 |
6 | parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.')
7 | parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v1')
8 | parser.add_argument('--step', type=int, default=1)
9 | args = parser.parse_args()
10 |
11 | env = gym.make(args.env_id)
12 | env.reset()
13 |
14 | step = 0
15 | while True:
16 | if args.step:
17 | env.step(env.action_space.sample())
18 | env.render()
19 | if step % 10 == 0:
20 | env.reset()
21 | step += 1
22 |
--------------------------------------------------------------------------------
/examples/agents/_policies.py:
--------------------------------------------------------------------------------
1 | # Support code for cem.py
2 |
3 | class BinaryActionLinearPolicy(object):
4 | def __init__(self, theta):
5 | self.w = theta[:-1]
6 | self.b = theta[-1]
7 | def act(self, ob):
8 | y = ob.dot(self.w) + self.b
9 | a = int(y < 0)
10 | return a
11 |
12 | class ContinuousActionLinearPolicy(object):
13 | def __init__(self, theta, n_in, n_out):
14 | assert len(theta) == (n_in + 1) * n_out
15 | self.W = theta[0 : n_in * n_out].reshape(n_in, n_out)
16 | self.b = theta[n_in * n_out : None].reshape(1, n_out)
17 | def act(self, ob):
18 | a = ob.dot(self.W) + self.b
19 | return a
20 |
--------------------------------------------------------------------------------
/gym/utils/json_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def json_encode_np(obj):
4 | """
5 | JSON can't serialize numpy types, so convert to pure python
6 | """
7 | if isinstance(obj, np.ndarray):
8 | return list(obj)
9 | elif isinstance(obj, np.float32):
10 | return float(obj)
11 | elif isinstance(obj, np.float64):
12 | return float(obj)
13 | elif isinstance(obj, np.int8):
14 | return int(obj)
15 | elif isinstance(obj, np.int16):
16 | return int(obj)
17 | elif isinstance(obj, np.int32):
18 | return int(obj)
19 | elif isinstance(obj, np.int64):
20 | return int(obj)
21 | else:
22 | return obj
23 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
1 | OpenAI Gym is dedicated to providing a harassment-free experience for
2 | everyone, regardless of gender, gender identity and expression, sexual
3 | orientation, disability, physical appearance, body size, age, race, or
4 | religion. We do not tolerate harassment of participants in any form.
5 |
6 | This code of conduct applies to all OpenAI Gym spaces (including Gist
7 | comments) both online and off. Anyone who violates this code of
8 | conduct may be sanctioned or expelled from these spaces at the
9 | discretion of the OpenAI team.
10 |
11 | We may add additional rules over time, which will be made clearly
12 | available to participants. Participants are responsible for knowing
13 | and abiding by these rules.
14 |
--------------------------------------------------------------------------------
/gym/spaces/multi_binary.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | class MultiBinary(gym.Space):
5 | def __init__(self, n):
6 | self.n = n
7 | gym.Space.__init__(self, (self.n,), np.int8)
8 |
9 | def sample(self):
10 | return gym.spaces.np_random.randint(low=0, high=2, size=self.n).astype(self.dtype)
11 |
12 | def contains(self, x):
13 | return ((x==0) | (x==1)).all()
14 |
15 | def to_jsonable(self, sample_n):
16 | return np.array(sample_n).tolist()
17 |
18 | def from_jsonable(self, sample_n):
19 | return [np.asarray(sample) for sample in sample_n]
20 |
21 | def __repr__(self):
22 | return "MultiBinary({})".format(self.n)
23 |
24 | def __eq__(self, other):
25 | return self.n == other.n
26 |
--------------------------------------------------------------------------------
/gym/spaces/prng.py:
--------------------------------------------------------------------------------
1 | import numpy
2 |
3 | np_random = numpy.random.RandomState()
4 |
5 | def seed(seed=None):
6 | """Seed the common numpy.random.RandomState used in spaces
7 |
8 | CF
9 | https://github.com/openai/gym/commit/58e6aa95e5af2c738557431f812abb81c505a7cf#commitcomment-17669277
10 | for some details about why we seed the spaces separately from the
11 | envs, but tl;dr is that it's pretty uncommon for them to be used
12 | within an actual algorithm, and the code becomes simpler to just
13 | use this common numpy.random.RandomState.
14 | """
15 | np_random.seed(seed)
16 |
17 | # This numpy.random.RandomState gets used in all spaces for their
18 | # 'sample' method. It's not really expected that people will be using
19 | # these in their algorithms.
20 | seed(0)
21 |
--------------------------------------------------------------------------------
/gym/logger.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from gym.utils import colorize
4 |
5 | DEBUG = 10
6 | INFO = 20
7 | WARN = 30
8 | ERROR = 40
9 | DISABLED = 50
10 |
11 | MIN_LEVEL = 30
12 |
13 | def set_level(level):
14 | """
15 | Set logging threshold on current logger.
16 | """
17 | global MIN_LEVEL
18 | MIN_LEVEL = level
19 |
20 | def debug(msg, *args):
21 | if MIN_LEVEL <= DEBUG:
22 | print('%s: %s'%('DEBUG', msg % args))
23 |
24 | def info(msg, *args):
25 | if MIN_LEVEL <= INFO:
26 | print('%s: %s'%('INFO', msg % args))
27 |
28 | def warn(msg, *args):
29 | if MIN_LEVEL <= WARN:
30 | warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
31 |
32 | def error(msg, *args):
33 | if MIN_LEVEL <= ERROR:
34 | print(colorize('%s: %s'%('ERROR', msg % args), 'red'))
35 |
36 | # DEPRECATED:
37 | setLevel = set_level
38 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco.mujoco_env import MujocoEnv
2 | # ^^^^^ so that user gets the correct error
3 | # message if mujoco is not installed correctly
4 | from gym.envs.mujoco.ant import AntEnv
5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
6 | from gym.envs.mujoco.hopper import HopperEnv
7 | from gym.envs.mujoco.walker2d import Walker2dEnv
8 | from gym.envs.mujoco.humanoid import HumanoidEnv
9 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
10 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
11 | from gym.envs.mujoco.reacher import ReacherEnv
12 | from gym.envs.mujoco.swimmer import SwimmerEnv
13 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
14 | from gym.envs.mujoco.pusher import PusherEnv
15 | from gym.envs.mujoco.thrower import ThrowerEnv
16 | from gym.envs.mujoco.striker import StrikerEnv
17 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/reach.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'reach.xml')
8 |
9 |
10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.4049,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | }
17 | fetch_env.FetchEnv.__init__(
18 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
19 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
20 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
21 | initial_qpos=initial_qpos, reward_type=reward_type)
22 | utils.EzPickle.__init__(self)
23 |
--------------------------------------------------------------------------------
/gym/spaces/discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gym
3 |
4 | class Discrete(gym.Space):
5 | """
6 | {0,1,...,n-1}
7 |
8 | Example usage:
9 | self.observation_space = spaces.Discrete(2)
10 | """
11 | def __init__(self, n):
12 | self.n = n
13 | gym.Space.__init__(self, (), np.int64)
14 |
15 | def sample(self):
16 | return gym.spaces.np_random.randint(self.n)
17 |
18 | def contains(self, x):
19 | if isinstance(x, int):
20 | as_int = x
21 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.kind in np.typecodes['AllInteger'] and x.shape == ()):
22 | as_int = int(x)
23 | else:
24 | return False
25 | return as_int >= 0 and as_int < self.n
26 |
27 | def __repr__(self):
28 | return "Discrete(%d)" % self.n
29 |
30 | def __eq__(self, other):
31 | return self.n == other.n
32 |
--------------------------------------------------------------------------------
/gym/envs/tests/spec_list.py:
--------------------------------------------------------------------------------
1 | from gym import envs, logger
2 | import os
3 |
4 | def should_skip_env_spec_for_tests(spec):
5 | # We skip tests for envs that require dependencies or are otherwise
6 | # troublesome to run frequently
7 | ep = spec._entry_point
8 | # Skip mujoco tests for pull request CI
9 | skip_mujoco = not (os.environ.get('MUJOCO_KEY'))
10 | if skip_mujoco and (ep.startswith('gym.envs.mujoco:') or ep.startswith('gym.envs.robotics:')):
11 | return True
12 | if ( 'GoEnv' in ep or
13 | 'HexEnv' in ep or
14 | (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
15 | ):
16 | logger.warn("Skipping tests for env {}".format(ep))
17 | return True
18 | return False
19 |
20 | spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec._entry_point is not None and not should_skip_env_spec_for_tests(spec)]
21 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/push.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml')
8 |
9 |
10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/wrappers/dict.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 |
5 | __all__ = ['FlattenDictWrapper']
6 |
7 |
8 | class FlattenDictWrapper(gym.ObservationWrapper):
9 | """Flattens selected keys of a Dict observation space into
10 | an array.
11 | """
12 | def __init__(self, env, dict_keys):
13 | super(FlattenDictWrapper, self).__init__(env)
14 | self.dict_keys = dict_keys
15 |
16 | # Figure out observation_space dimension.
17 | size = 0
18 | for key in dict_keys:
19 | shape = self.env.observation_space.spaces[key].shape
20 | size += np.prod(shape)
21 | self.observation_space = gym.spaces.Box(-np.inf, np.inf, shape=(size,), dtype='float32')
22 |
23 | def observation(self, observation):
24 | assert isinstance(observation, dict)
25 | obs = []
26 | for key in self.dict_keys:
27 | obs.append(observation[key].ravel())
28 | return np.concatenate(obs)
29 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/duplicated_input.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to return every nth character from the input tape.
3 | http://arxiv.org/abs/1511.07275
4 | """
5 | from __future__ import division
6 | from gym.envs.algorithmic import algorithmic_env
7 |
8 | class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | def __init__(self, duplication=2, base=5):
10 | self.duplication = duplication
11 | super(DuplicatedInputEnv, self).__init__(base=base, chars=True)
12 |
13 | def generate_input_data(self, size):
14 | res = []
15 | if size < self.duplication:
16 | size = self.duplication
17 | for i in range(size//self.duplication):
18 | char = self.np_random.randint(self.base)
19 | for _ in range(self.duplication):
20 | res.append(char)
21 | return res
22 |
23 | def target_from_input_data(self, input_data):
24 | return [input_data[i] for i in range(0, len(input_data), self.duplication)]
25 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/pick_and_place.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml')
8 |
9 |
10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
20 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/slide.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | from gym import utils
5 | from gym.envs.robotics import fetch_env
6 |
7 |
8 | # Ensure we get the path separator correct on windows
9 | MODEL_XML_PATH = os.path.join('fetch', 'slide.xml')
10 |
11 |
12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle):
13 | def __init__(self, reward_type='sparse'):
14 | initial_qpos = {
15 | 'robot0:slide0': 0.05,
16 | 'robot0:slide1': 0.48,
17 | 'robot0:slide2': 0.0,
18 | 'object0:joint': [1.7, 1.1, 0.4, 1., 0., 0., 0.],
19 | }
20 | fetch_env.FetchEnv.__init__(
21 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
22 | gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]),
23 | obj_range=0.1, target_range=0.3, distance_threshold=0.05,
24 | initial_qpos=initial_qpos, reward_type=reward_type)
25 | utils.EzPickle.__init__(self)
26 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/utils/colorize.py:
--------------------------------------------------------------------------------
1 | """A set of common utilities used within the environments. These are
2 | not intended as API functions, and will not remain stable over time.
3 | """
4 |
5 | color2num = dict(
6 | gray=30,
7 | red=31,
8 | green=32,
9 | yellow=33,
10 | blue=34,
11 | magenta=35,
12 | cyan=36,
13 | white=37,
14 | crimson=38
15 | )
16 |
17 |
18 | def colorize(string, color, bold=False, highlight = False):
19 | """Return string surrounded by appropriate terminal color codes to
20 | print colorized text. Valid colors: gray, red, green, yellow,
21 | blue, magenta, cyan, white, crimson
22 | """
23 |
24 | # Import six here so that `utils` has no import-time dependencies.
25 | # We want this since we use `utils` during our import-time sanity checks
26 | # that verify that our dependencies (including six) are actually present.
27 | import six
28 |
29 | attr = []
30 | num = color2num[color]
31 | if highlight: num += 10
32 | attr.append(six.u(str(num)))
33 | if bold: attr.append(six.u('1'))
34 | attrs = six.u(';').join(attr)
35 | return six.u('\x1b[%sm%s\x1b[0m') % (attrs, string)
36 |
--------------------------------------------------------------------------------
/gym/utils/ezpickle.py:
--------------------------------------------------------------------------------
1 | class EzPickle(object):
2 | """Objects that are pickled and unpickled via their constructor
3 | arguments.
4 |
5 | Example usage:
6 |
7 | class Dog(Animal, EzPickle):
8 | def __init__(self, furcolor, tailkind="bushy"):
9 | Animal.__init__()
10 | EzPickle.__init__(furcolor, tailkind)
11 | ...
12 |
13 | When this object is unpickled, a new Dog will be constructed by passing the provided
14 | furcolor and tailkind into the constructor. However, philosophers are still not sure
15 | whether it is still the same dog.
16 |
17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo
18 | and Atari.
19 | """
20 | def __init__(self, *args, **kwargs):
21 | self._ezpickle_args = args
22 | self._ezpickle_kwargs = kwargs
23 | def __getstate__(self):
24 | return {"_ezpickle_args" : self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs}
25 | def __setstate__(self, d):
26 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"])
27 | self.__dict__.update(out.__dict__)
28 |
--------------------------------------------------------------------------------
/gym/wrappers/README.md:
--------------------------------------------------------------------------------
1 | # Wrappers
2 |
3 | Wrappers are used to transform an environment in a modular way:
4 |
5 | ```
6 | env = gym.make('Pong-v0')
7 | env = MyWrapper(env)
8 | ```
9 |
10 | Note that we may later restructure any of the files in this directory,
11 | but will keep the wrappers available at the wrappers' top-level
12 | folder. So for example, you should access `MyWrapper` as follows:
13 |
14 | ```
15 | # Will be supported in future releases
16 | from gym.wrappers import MyWrapper
17 | ```
18 |
19 | ## Quick tips for writing your own wrapper
20 |
21 | - Don't forget to call super(class_name, self).__init__(env) if you override the wrapper's __init__ function
22 | - You can access the inner environment with `self.unwrapped`
23 | - You can access the previous layer using `self.env`
24 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer
25 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `_step`, `_reset`, `_render`, `_close`, or `_seed`
26 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`)
27 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/inverted_pendulum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
9 |
10 | def step(self, a):
11 | reward = 1.0
12 | self.do_simulation(a, self.frame_skip)
13 | ob = self._get_obs()
14 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
15 | done = not notdone
16 | return ob, reward, done, {}
17 |
18 | def reset_model(self):
19 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
20 | qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
21 | self.set_state(qpos, qvel)
22 | return self._get_obs()
23 |
24 | def _get_obs(self):
25 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
26 |
27 | def viewer_setup(self):
28 | v = self.viewer
29 | v.cam.trackbodyid = 0
30 | v.cam.distance = self.model.stat.extent
31 |
--------------------------------------------------------------------------------
/gym/spaces/multi_discrete.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | class MultiDiscrete(gym.Space):
5 | def __init__(self, nvec):
6 | """
7 | nvec: vector of counts of each categorical variable
8 | """
9 | assert (np.array(nvec) > 0).all(), 'nvec (counts) have to be positive'
10 | self.nvec = np.asarray(nvec, dtype=np.uint32)
11 | gym.Space.__init__(self, self.nvec.shape, np.uint32)
12 |
13 | def sample(self):
14 | return (gym.spaces.np_random.random_sample(self.nvec.shape) * self.nvec).astype(self.dtype)
15 |
16 | def contains(self, x):
17 | # if nvec is uint32 and space dtype is uint32, then 0 <= x < self.nvec guarantees that x
18 | # is within correct bounds for space dtype (even though x does not have to be unsigned)
19 | return (0 <= x).all() and (x < self.nvec).all()
20 |
21 | def to_jsonable(self, sample_n):
22 | return [sample.tolist() for sample in sample_n]
23 |
24 | def from_jsonable(self, sample_n):
25 | return np.array(sample_n)
26 |
27 | def __repr__(self):
28 | return "MultiDiscrete({})".format(self.nvec)
29 |
30 | def __eq__(self, other):
31 | return np.all(self.nvec == other.nvec)
32 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/swimmer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | ctrl_cost_coeff = 0.0001
12 | xposbefore = self.sim.data.qpos[0]
13 | self.do_simulation(a, self.frame_skip)
14 | xposafter = self.sim.data.qpos[0]
15 | reward_fwd = (xposafter - xposbefore) / self.dt
16 | reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
17 | reward = reward_fwd + reward_ctrl
18 | ob = self._get_obs()
19 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
20 |
21 | def _get_obs(self):
22 | qpos = self.sim.data.qpos
23 | qvel = self.sim.data.qvel
24 | return np.concatenate([qpos.flat[2:], qvel.flat])
25 |
26 | def reset_model(self):
27 | self.set_state(
28 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
29 | self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
30 | )
31 | return self._get_obs()
32 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/reversed_addition.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import numpy as np
3 | from gym.envs.algorithmic import algorithmic_env
4 |
5 | class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
6 | def __init__(self, rows=2, base=3):
7 | super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
8 |
9 | def target_from_input_data(self, input_strings):
10 | curry = 0
11 | target = []
12 | for digits in input_strings:
13 | total = sum(digits) + curry
14 | target.append(total % self.base)
15 | curry = total // self.base
16 |
17 | if curry > 0:
18 | target.append(curry)
19 | return target
20 |
21 | @property
22 | def time_limit(self):
23 | # Quirk preserved for the sake of consistency: add the length of the input
24 | # rather than the length of the desired output (which may differ if there's
25 | # an extra carried digit).
26 | # TODO: It seems like this time limit is so strict as to make Addition3-v0
27 | # unsolvable, since agents aren't even given enough time steps to look at
28 | # all the digits. (The solutions on the scoreboard seem to only work by
29 | # save-scumming.)
30 | return self.input_width*2 + 4
31 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/half_cheetah.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, action):
11 | xposbefore = self.sim.data.qpos[0]
12 | self.do_simulation(action, self.frame_skip)
13 | xposafter = self.sim.data.qpos[0]
14 | ob = self._get_obs()
15 | reward_ctrl = - 0.1 * np.square(action).sum()
16 | reward_run = (xposafter - xposbefore)/self.dt
17 | reward = reward_ctrl + reward_run
18 | done = False
19 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
20 |
21 | def _get_obs(self):
22 | return np.concatenate([
23 | self.sim.data.qpos.flat[1:],
24 | self.sim.data.qvel.flat,
25 | ])
26 |
27 | def reset_model(self):
28 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
29 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
30 | self.set_state(qpos, qvel)
31 | return self._get_obs()
32 |
33 | def viewer_setup(self):
34 | self.viewer.cam.distance = self.model.stat.extent * 0.5
35 |
--------------------------------------------------------------------------------
/gym/spaces/tuple_space.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 | class Tuple(gym.Space):
4 | """
5 | A tuple (i.e., product) of simpler spaces
6 |
7 | Example usage:
8 | self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
9 | """
10 | def __init__(self, spaces):
11 | self.spaces = spaces
12 | gym.Space.__init__(self, None, None)
13 |
14 | def sample(self):
15 | return tuple([space.sample() for space in self.spaces])
16 |
17 | def contains(self, x):
18 | if isinstance(x, list):
19 | x = tuple(x) # Promote list to tuple for contains check
20 | return isinstance(x, tuple) and len(x) == len(self.spaces) and all(
21 | space.contains(part) for (space,part) in zip(self.spaces,x))
22 |
23 | def __repr__(self):
24 | return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")"
25 |
26 | def to_jsonable(self, sample_n):
27 | # serialize as list-repr of tuple of vectors
28 | return [space.to_jsonable([sample[i] for sample in sample_n]) \
29 | for i, space in enumerate(self.spaces)]
30 |
31 | def from_jsonable(self, sample_n):
32 | return [sample for sample in zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)])]
33 |
34 | def __eq__(self, other):
35 | return self.spaces == other.spaces
36 |
--------------------------------------------------------------------------------
/test.dockerfile.14.04:
--------------------------------------------------------------------------------
1 | # A Dockerfile that sets up a full Gym install with test dependencies
2 | FROM ubuntu:14.04
3 | # Note that latest version of mujoco-py, 1.5, does not play nicely with ubuntu 14.04 -
4 | # requires patchelf system package not available on 14.04
5 |
6 | # Install keyboard-configuration separately to avoid travis hanging waiting for keyboard selection
7 | RUN \
8 | apt-get -y update && \
9 | apt-get install -y keyboard-configuration && \
10 |
11 | apt-get install -y \
12 | python-setuptools \
13 | python-pip \
14 | python3-dev \
15 | libjpeg-dev \
16 | cmake \
17 | swig \
18 | python-pyglet \
19 | python3-opengl \
20 | libboost-all-dev \
21 | libsdl2-2.0.0 \
22 | libsdl2-dev \
23 | libglu1-mesa \
24 | libglu1-mesa-dev \
25 | libgles2-mesa-dev \
26 | xvfb \
27 | libav-tools \
28 | freeglut3 \
29 | wget \
30 | unzip && \
31 |
32 | apt-get clean && \
33 | rm -rf /var/lib/apt/lists/* && \
34 | pip install tox
35 |
36 |
37 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin
38 |
39 | # install dependencies
40 | COPY . /usr/local/gym/
41 | RUN cd /usr/local/gym && \
42 | tox --notest
43 |
44 | WORKDIR /usr/local/gym/
45 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"]
46 | CMD ["tox"]
47 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/push.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/test.dockerfile.18.04:
--------------------------------------------------------------------------------
1 | # A Dockerfile that sets up a full Gym install with test dependencies
2 | FROM ubuntu:18.04
3 |
4 | # Install keyboard-configuration separately to avoid travis hanging waiting for keyboard selection
5 | RUN \
6 | apt -y update && \
7 | apt install -y keyboard-configuration && \
8 |
9 | apt install -y \
10 | python-setuptools \
11 | python-pip \
12 | python3-dev \
13 | python-pyglet \
14 | python3-opengl \
15 | libjpeg-dev \
16 | libboost-all-dev \
17 | libsdl2-dev \
18 | libosmesa6-dev \
19 | patchelf \
20 | ffmpeg \
21 | xvfb \
22 | wget \
23 | unzip && \
24 |
25 | apt clean && \
26 | rm -rf /var/lib/apt/lists/* && \
27 | pip install tox && \
28 |
29 | # Download mujoco
30 | mkdir /root/.mujoco && \
31 | cd /root/.mujoco && \
32 | wget https://www.roboti.us/download/mjpro150_linux.zip && \
33 | unzip mjpro150_linux.zip
34 |
35 | ARG MUJOCO_KEY
36 | ENV MUJOCO_KEY=$MUJOCO_KEY
37 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin
38 | RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt
39 |
40 | # install dependencies
41 | COPY . /usr/local/gym/
42 | RUN cd /usr/local/gym && \
43 | tox --notest
44 |
45 | WORKDIR /usr/local/gym/
46 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"]
47 | CMD ["tox"]
48 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/inverted_pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/slide.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/gym/utils/reraise.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | # We keep the actual reraising in different modules, since the
4 | # reraising code uses syntax mutually exclusive to Python 2/3.
5 | if sys.version_info[0] < 3:
6 | from .reraise_impl_py2 import reraise_impl #pylint: disable=E0401
7 | else:
8 | from .reraise_impl_py3 import reraise_impl
9 |
10 | def reraise(prefix=None, suffix=None):
11 | old_exc_type, old_exc_value, traceback = sys.exc_info()
12 | if old_exc_value is None:
13 | old_exc_value = old_exc_type()
14 |
15 | e = ReraisedException(old_exc_value, prefix, suffix)
16 |
17 | reraise_impl(e, traceback)
18 |
19 | # http://stackoverflow.com/a/13653312
20 | def full_class_name(o):
21 | module = o.__class__.__module__
22 | if module is None or module == str.__class__.__module__:
23 | return o.__class__.__name__
24 | return module + '.' + o.__class__.__name__
25 |
26 | class ReraisedException(Exception):
27 | def __init__(self, old_exc, prefix, suffix):
28 | self.old_exc = old_exc
29 | self.prefix = prefix
30 | self.suffix = suffix
31 |
32 | def __str__(self):
33 | klass = self.old_exc.__class__
34 |
35 | orig = "%s: %s" % (full_class_name(self.old_exc), klass.__str__(self.old_exc))
36 | prefixpart = suffixpart = ''
37 | if self.prefix is not None:
38 | prefixpart = self.prefix + "\n"
39 | if self.suffix is not None:
40 | suffixpart = "\n\n" + self.suffix
41 | return "%sThe original exception was:\n\n%s%s" % (prefixpart, orig, suffixpart)
42 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/walker2d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 |
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, a):
12 | posbefore = self.sim.data.qpos[0]
13 | self.do_simulation(a, self.frame_skip)
14 | posafter, height, ang = self.sim.data.qpos[0:3]
15 | alive_bonus = 1.0
16 | reward = ((posafter - posbefore) / self.dt)
17 | reward += alive_bonus
18 | reward -= 1e-3 * np.square(a).sum()
19 | done = not (height > 0.8 and height < 2.0 and
20 | ang > -1.0 and ang < 1.0)
21 | ob = self._get_obs()
22 | return ob, reward, done, {}
23 |
24 | def _get_obs(self):
25 | qpos = self.sim.data.qpos
26 | qvel = self.sim.data.qvel
27 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
28 |
29 | def reset_model(self):
30 | self.set_state(
31 | self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
32 | self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
33 | )
34 | return self._get_obs()
35 |
36 | def viewer_setup(self):
37 | self.viewer.cam.trackbodyid = 2
38 | self.viewer.cam.distance = self.model.stat.extent * 0.5
39 | self.viewer.cam.lookat[2] = 1.15
40 | self.viewer.cam.elevation = -20
41 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/roulette.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.utils import seeding
4 |
5 |
6 | class RouletteEnv(gym.Env):
7 | """Simple roulette environment
8 |
9 | The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up,
10 | you win a reward of 35. If the parity of your bet matches the parity
11 | of the spin, you win 1. Otherwise you receive a reward of -1.
12 |
13 | The long run reward for playing 0 should be -1/37 for any state
14 |
15 | The last action (38) stops the rollout for a return of 0 (walking away)
16 | """
17 | def __init__(self, spots=37):
18 | self.n = spots + 1
19 | self.action_space = spaces.Discrete(self.n)
20 | self.observation_space = spaces.Discrete(1)
21 | self.seed()
22 |
23 | def seed(self, seed=None):
24 | self.np_random, seed = seeding.np_random(seed)
25 | return [seed]
26 |
27 | def step(self, action):
28 | assert self.action_space.contains(action)
29 | if action == self.n - 1:
30 | # observation, reward, done, info
31 | return 0, 0, True, {}
32 |
33 | # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B]
34 | val = self.np_random.randint(0, self.n - 1)
35 | if val == action == 0:
36 | reward = self.n - 2.0
37 | elif val != 0 and action != 0 and val % 2 == action % 2:
38 | reward = 1.0
39 | else:
40 | reward = -1.0
41 | return 0, reward, False, {}
42 |
43 | def reset(self):
44 | return 0
45 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # gym
2 |
3 | The MIT License
4 |
5 | Copyright (c) 2016 OpenAI (https://openai.com)
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 |
25 | # Mujoco models
26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
27 | ```
28 | This file is part of MuJoCo.
29 | Copyright 2009-2015 Roboti LLC.
30 | Mujoco :: Advanced physics simulation engine
31 | Source : www.roboti.us
32 | Version : 1.31
33 | Released : 23Apr16
34 | Author :: Vikash Kumar
35 | Contacts : kumar@roboti.us
36 | ```
37 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_registration.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from gym import error, envs
3 | from gym.envs import registration
4 | from gym.envs.classic_control import cartpole
5 |
6 | def test_make():
7 | env = envs.make('CartPole-v0')
8 | assert env.spec.id == 'CartPole-v0'
9 | assert isinstance(env.unwrapped, cartpole.CartPoleEnv)
10 |
11 | def test_make_deprecated():
12 | try:
13 | envs.make('Humanoid-v0')
14 | except error.Error:
15 | pass
16 | else:
17 | assert False
18 |
19 | def test_spec():
20 | spec = envs.spec('CartPole-v0')
21 | assert spec.id == 'CartPole-v0'
22 |
23 | def test_missing_lookup():
24 | registry = registration.EnvRegistry()
25 | registry.register(id='Test-v0', entry_point=None)
26 | registry.register(id='Test-v15', entry_point=None)
27 | registry.register(id='Test-v9', entry_point=None)
28 | registry.register(id='Other-v100', entry_point=None)
29 | try:
30 | registry.spec('Test-v1') # must match an env name but not the version above
31 | except error.DeprecatedEnv:
32 | pass
33 | else:
34 | assert False
35 |
36 | try:
37 | registry.spec('Unknown-v1')
38 | except error.UnregisteredEnv:
39 | pass
40 | else:
41 | assert False
42 |
43 | def test_malformed_lookup():
44 | registry = registration.EnvRegistry()
45 | try:
46 | registry.spec(u'“Breakout-v0”')
47 | except error.Error as e:
48 | assert 'malformed environment ID' in '{}'.format(e), 'Unexpected message: {}'.format(e)
49 | else:
50 | assert False
51 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/hopper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | posbefore = self.sim.data.qpos[0]
12 | self.do_simulation(a, self.frame_skip)
13 | posafter, height, ang = self.sim.data.qpos[0:3]
14 | alive_bonus = 1.0
15 | reward = (posafter - posbefore) / self.dt
16 | reward += alive_bonus
17 | reward -= 1e-3 * np.square(a).sum()
18 | s = self.state_vector()
19 | done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
20 | (height > .7) and (abs(ang) < .2))
21 | ob = self._get_obs()
22 | return ob, reward, done, {}
23 |
24 | def _get_obs(self):
25 | return np.concatenate([
26 | self.sim.data.qpos.flat[1:],
27 | np.clip(self.sim.data.qvel.flat, -10, 10)
28 | ])
29 |
30 | def reset_model(self):
31 | qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
32 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
33 | self.set_state(qpos, qvel)
34 | return self._get_obs()
35 |
36 | def viewer_setup(self):
37 | self.viewer.cam.trackbodyid = 2
38 | self.viewer.cam.distance = self.model.stat.extent * 0.75
39 | self.viewer.cam.lookat[2] = 1.15
40 | self.viewer.cam.elevation = -20
41 |
--------------------------------------------------------------------------------
/test.dockerfile.16.04:
--------------------------------------------------------------------------------
1 | # A Dockerfile that sets up a full Gym install with test dependencies
2 | FROM ubuntu:16.04
3 |
4 | # Install keyboard-configuration separately to avoid travis hanging waiting for keyboard selection
5 | RUN \
6 | apt-get -y update && \
7 | apt-get install -y keyboard-configuration && \
8 |
9 | # Maybe Install python3.6 on ubuntu 16.04 ?
10 | # apt-get install -y software-properties-common && \
11 | # add-apt-repository -y ppa:jonathonf/python-3.6 && \
12 | # apt-get -y update && \
13 | # apt-get -y install python3.6 python3.6-distutils python3.6-dev
14 |
15 | apt-get install -y \
16 | python-setuptools \
17 | python-pip \
18 | python3-dev \
19 | python-pyglet \
20 | python3-opengl \
21 | libjpeg-dev \
22 | libboost-all-dev \
23 | libsdl2-dev \
24 | libosmesa6-dev \
25 | patchelf \
26 | xvfb \
27 | ffmpeg \
28 | wget \
29 | unzip && \
30 |
31 | apt-get clean && \
32 | rm -rf /var/lib/apt/lists/* && \
33 | pip install tox && \
34 |
35 | # Download mujoco
36 | mkdir /root/.mujoco && \
37 | cd /root/.mujoco && \
38 | wget https://www.roboti.us/download/mjpro150_linux.zip && \
39 | unzip mjpro150_linux.zip
40 |
41 | ARG MUJOCO_KEY
42 | ENV MUJOCO_KEY=$MUJOCO_KEY
43 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin
44 |
45 | RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt
46 |
47 | # install dependencies
48 | COPY . /usr/local/gym/
49 | RUN cd /usr/local/gym && \
50 | tox --notest
51 |
52 | WORKDIR /usr/local/gym/
53 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"]
54 | CMD ["tox"]
55 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/inverted_double_pendulum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 |
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, action):
12 | self.do_simulation(action, self.frame_skip)
13 | ob = self._get_obs()
14 | x, _, y = self.sim.data.site_xpos[0]
15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
16 | v1, v2 = self.sim.data.qvel[1:3]
17 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
18 | alive_bonus = 10
19 | r = alive_bonus - dist_penalty - vel_penalty
20 | done = bool(y <= 1)
21 | return ob, r, done, {}
22 |
23 | def _get_obs(self):
24 | return np.concatenate([
25 | self.sim.data.qpos[:1], # cart x pos
26 | np.sin(self.sim.data.qpos[1:]), # link angles
27 | np.cos(self.sim.data.qpos[1:]),
28 | np.clip(self.sim.data.qvel, -10, 10),
29 | np.clip(self.sim.data.qfrc_constraint, -10, 10)
30 | ]).ravel()
31 |
32 | def reset_model(self):
33 | self.set_state(
34 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
35 | self.init_qvel + self.np_random.randn(self.model.nv) * .1
36 | )
37 | return self._get_obs()
38 |
39 | def viewer_setup(self):
40 | v = self.viewer
41 | v.cam.trackbodyid = 0
42 | v.cam.distance = self.model.stat.extent * 0.5
43 | v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]
44 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/pick_and_place.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/reacher.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
9 |
10 | def step(self, a):
11 | vec = self.get_body_com("fingertip")-self.get_body_com("target")
12 | reward_dist = - np.linalg.norm(vec)
13 | reward_ctrl = - np.square(a).sum()
14 | reward = reward_dist + reward_ctrl
15 | self.do_simulation(a, self.frame_skip)
16 | ob = self._get_obs()
17 | done = False
18 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
19 |
20 | def viewer_setup(self):
21 | self.viewer.cam.trackbodyid = 0
22 |
23 | def reset_model(self):
24 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
25 | while True:
26 | self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
27 | if np.linalg.norm(self.goal) < 2:
28 | break
29 | qpos[-2:] = self.goal
30 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
31 | qvel[-2:] = 0
32 | self.set_state(qpos, qvel)
33 | return self._get_obs()
34 |
35 | def _get_obs(self):
36 | theta = self.sim.data.qpos.flat[:2]
37 | return np.concatenate([
38 | np.cos(theta),
39 | np.sin(theta),
40 | self.sim.data.qpos.flat[2:],
41 | self.sim.data.qvel.flat[:2],
42 | self.get_body_com("fingertip") - self.get_body_com("target")
43 | ])
44 |
--------------------------------------------------------------------------------
/gym/wrappers/time_limit.py:
--------------------------------------------------------------------------------
1 | import time
2 | from gym import Wrapper, logger
3 |
4 | class TimeLimit(Wrapper):
5 | def __init__(self, env, max_episode_seconds=None, max_episode_steps=None):
6 | super(TimeLimit, self).__init__(env)
7 | self._max_episode_seconds = max_episode_seconds
8 | self._max_episode_steps = max_episode_steps
9 |
10 | self._elapsed_steps = 0
11 | self._episode_started_at = None
12 |
13 | @property
14 | def _elapsed_seconds(self):
15 | return time.time() - self._episode_started_at
16 |
17 | def _past_limit(self):
18 | """Return true if we are past our limit"""
19 | if self._max_episode_steps is not None and self._max_episode_steps <= self._elapsed_steps:
20 | logger.debug("Env has passed the step limit defined by TimeLimit.")
21 | return True
22 |
23 | if self._max_episode_seconds is not None and self._max_episode_seconds <= self._elapsed_seconds:
24 | logger.debug("Env has passed the seconds limit defined by TimeLimit.")
25 | return True
26 |
27 | return False
28 |
29 | def step(self, action):
30 | assert self._episode_started_at is not None, "Cannot call env.step() before calling reset()"
31 | observation, reward, done, info = self.env.step(action)
32 | self._elapsed_steps += 1
33 |
34 | if self._past_limit():
35 | if self.metadata.get('semantics.autoreset'):
36 | _ = self.reset() # automatically reset the env
37 | done = True
38 |
39 | return observation, reward, done, info
40 |
41 | def reset(self):
42 | self._episode_started_at = time.time()
43 | self._elapsed_steps = 0
44 | return self.env.reset()
45 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym import Env, spaces
4 | from gym.utils import seeding
5 |
6 | def categorical_sample(prob_n, np_random):
7 | """
8 | Sample from categorical distribution
9 | Each row specifies class probabilities
10 | """
11 | prob_n = np.asarray(prob_n)
12 | csprob_n = np.cumsum(prob_n)
13 | return (csprob_n > np_random.rand()).argmax()
14 |
15 |
16 | class DiscreteEnv(Env):
17 |
18 | """
19 | Has the following members
20 | - nS: number of states
21 | - nA: number of actions
22 | - P: transitions (*)
23 | - isd: initial state distribution (**)
24 |
25 | (*) dictionary dict of dicts of lists, where
26 | P[s][a] == [(probability, nextstate, reward, done), ...]
27 | (**) list or array of length nS
28 |
29 |
30 | """
31 | def __init__(self, nS, nA, P, isd):
32 | self.P = P
33 | self.isd = isd
34 | self.lastaction=None # for rendering
35 | self.nS = nS
36 | self.nA = nA
37 |
38 | self.action_space = spaces.Discrete(self.nA)
39 | self.observation_space = spaces.Discrete(self.nS)
40 |
41 | self.seed()
42 | self.reset()
43 |
44 | def seed(self, seed=None):
45 | self.np_random, seed = seeding.np_random(seed)
46 | return [seed]
47 |
48 | def reset(self):
49 | self.s = categorical_sample(self.isd, self.np_random)
50 | self.lastaction=None
51 | return self.s
52 |
53 | def step(self, a):
54 | transitions = self.P[self.s][a]
55 | i = categorical_sample([t[0] for t in transitions], self.np_random)
56 | p, s, r, d= transitions[i]
57 | self.s = s
58 | self.lastaction=a
59 | return (s, r, d, {"prob" : p})
60 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | import sys, os.path
3 |
4 | # Don't import gym module here, since deps may not be installed
5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'gym'))
6 | from version import VERSION
7 |
8 | # Environment-specific dependencies.
9 | extras = {
10 | 'atari': ['atari_py>=0.1.4', 'Pillow', 'PyOpenGL'],
11 | 'box2d': ['box2d-py>=2.3.5'],
12 | 'classic_control': ['PyOpenGL'],
13 | 'mujoco': ['mujoco_py>=1.50', 'imageio'],
14 | 'robotics': ['mujoco_py>=1.50', 'imageio'],
15 | }
16 |
17 | # Meta dependency groups.
18 | all_deps = []
19 | for group_name in extras:
20 | all_deps += extras[group_name]
21 | extras['all'] = all_deps
22 |
23 | setup(name='gym',
24 | version=VERSION,
25 | description='The OpenAI Gym: A toolkit for developing and comparing your reinforcement learning agents.',
26 | url='https://github.com/openai/gym',
27 | author='OpenAI',
28 | author_email='gym@openai.com',
29 | license='',
30 | packages=[package for package in find_packages()
31 | if package.startswith('gym')],
32 | zip_safe=False,
33 | install_requires=[
34 | 'scipy', 'numpy>=1.10.4', 'requests>=2.0', 'six', 'pyglet>=1.2.0',
35 | ],
36 | extras_require=extras,
37 | package_data={'gym': [
38 | 'envs/mujoco/assets/*.xml',
39 | 'envs/classic_control/assets/*.png',
40 | 'envs/robotics/assets/LICENSE.md',
41 | 'envs/robotics/assets/fetch/*.xml',
42 | 'envs/robotics/assets/hand/*.xml',
43 | 'envs/robotics/assets/stls/fetch/*.stl',
44 | 'envs/robotics/assets/stls/hand/*.stl',
45 | 'envs/robotics/assets/textures/*.png']
46 | },
47 | tests_require=['pytest', 'mock'],
48 | )
49 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/test_video_recorder.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import shutil
4 | import tempfile
5 | import numpy as np
6 |
7 | import gym
8 | from gym.wrappers.monitoring.video_recorder import VideoRecorder
9 |
10 | class BrokenRecordableEnv(object):
11 | metadata = {'render.modes': [None, 'rgb_array']}
12 |
13 | def render(self, mode=None):
14 | pass
15 |
16 | class UnrecordableEnv(object):
17 | metadata = {'render.modes': [None]}
18 |
19 | def render(self, mode=None):
20 | pass
21 |
22 | def test_record_simple():
23 | env = gym.make("CartPole-v1")
24 | rec = VideoRecorder(env)
25 | env.reset()
26 | rec.capture_frame()
27 | rec.close()
28 | assert not rec.empty
29 | assert not rec.broken
30 | assert os.path.exists(rec.path)
31 | f = open(rec.path)
32 | assert os.fstat(f.fileno()).st_size > 100
33 |
34 | def test_no_frames():
35 | env = BrokenRecordableEnv()
36 | rec = VideoRecorder(env)
37 | rec.close()
38 | assert rec.empty
39 | assert rec.functional
40 | assert not os.path.exists(rec.path)
41 |
42 | def test_record_unrecordable_method():
43 | env = UnrecordableEnv()
44 | rec = VideoRecorder(env)
45 | assert not rec.enabled
46 | rec.close()
47 |
48 | def test_record_breaking_render_method():
49 | env = BrokenRecordableEnv()
50 | rec = VideoRecorder(env)
51 | rec.capture_frame()
52 | rec.close()
53 | assert rec.empty
54 | assert rec.broken
55 | assert not os.path.exists(rec.path)
56 |
57 | def test_text_envs():
58 | env = gym.make('FrozenLake-v0')
59 | video = VideoRecorder(env)
60 | try:
61 | env.reset()
62 | video.capture_frame()
63 | video.close()
64 | finally:
65 | os.remove(video.path)
66 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/ant.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | xposbefore = self.get_body_com("torso")[0]
12 | self.do_simulation(a, self.frame_skip)
13 | xposafter = self.get_body_com("torso")[0]
14 | forward_reward = (xposafter - xposbefore)/self.dt
15 | ctrl_cost = .5 * np.square(a).sum()
16 | contact_cost = 0.5 * 1e-3 * np.sum(
17 | np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
18 | survive_reward = 1.0
19 | reward = forward_reward - ctrl_cost - contact_cost + survive_reward
20 | state = self.state_vector()
21 | notdone = np.isfinite(state).all() \
22 | and state[2] >= 0.2 and state[2] <= 1.0
23 | done = not notdone
24 | ob = self._get_obs()
25 | return ob, reward, done, dict(
26 | reward_forward=forward_reward,
27 | reward_ctrl=-ctrl_cost,
28 | reward_contact=-contact_cost,
29 | reward_survive=survive_reward)
30 |
31 | def _get_obs(self):
32 | return np.concatenate([
33 | self.sim.data.qpos.flat[2:],
34 | self.sim.data.qvel.flat,
35 | np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
36 | ])
37 |
38 | def reset_model(self):
39 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
40 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
41 | self.set_state(qpos, qvel)
42 | return self._get_obs()
43 |
44 | def viewer_setup(self):
45 | self.viewer.cam.distance = self.model.stat.extent * 0.5
46 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/point.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/humanoidstandup.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco import mujoco_env
2 | from gym import utils
3 | import numpy as np
4 |
5 | class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def _get_obs(self):
11 | data = self.sim.data
12 | return np.concatenate([data.qpos.flat[2:],
13 | data.qvel.flat,
14 | data.cinert.flat,
15 | data.cvel.flat,
16 | data.qfrc_actuator.flat,
17 | data.cfrc_ext.flat])
18 |
19 | def step(self, a):
20 | self.do_simulation(a, self.frame_skip)
21 | pos_after = self.sim.data.qpos[2]
22 | data = self.sim.data
23 | uph_cost = (pos_after - 0) / self.model.opt.timestep
24 |
25 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
26 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
27 | quad_impact_cost = min(quad_impact_cost, 10)
28 | reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
29 |
30 | done = bool(False)
31 | return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
32 |
33 | def reset_model(self):
34 | c = 0.01
35 | self.set_state(
36 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
37 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
38 | )
39 | return self._get_obs()
40 |
41 | def viewer_setup(self):
42 | self.viewer.cam.trackbodyid = 1
43 | self.viewer.cam.distance = self.model.stat.extent * 1.0
44 | self.viewer.cam.lookat[2] = 0.8925
45 | self.viewer.cam.elevation = -20
46 |
--------------------------------------------------------------------------------
/examples/agents/random_agent.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 |
4 | import gym
5 | from gym import wrappers, logger
6 |
7 | class RandomAgent(object):
8 | """The world's simplest agent!"""
9 | def __init__(self, action_space):
10 | self.action_space = action_space
11 |
12 | def act(self, observation, reward, done):
13 | return self.action_space.sample()
14 |
15 | if __name__ == '__main__':
16 | parser = argparse.ArgumentParser(description=None)
17 | parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run')
18 | args = parser.parse_args()
19 |
20 | # You can set the level to logger.DEBUG or logger.WARN if you
21 | # want to change the amount of output.
22 | logger.set_level(logger.INFO)
23 |
24 | env = gym.make(args.env_id)
25 |
26 | # You provide the directory to write to (can be an existing
27 | # directory, including one with existing data -- all monitor files
28 | # will be namespaced). You can also dump to a tempdir if you'd
29 | # like: tempfile.mkdtemp().
30 | outdir = '/tmp/random-agent-results'
31 | env = wrappers.Monitor(env, directory=outdir, force=True)
32 | env.seed(0)
33 | agent = RandomAgent(env.action_space)
34 |
35 | episode_count = 100
36 | reward = 0
37 | done = False
38 |
39 | for i in range(episode_count):
40 | ob = env.reset()
41 | while True:
42 | action = agent.act(ob, reward, done)
43 | ob, reward, done, _ = env.step(action)
44 | if done:
45 | break
46 | # Note there's no env.render() here. But the environment still can open window and
47 | # render if asked by env.monitor: it calls env.render('rgb_array') to record video.
48 | # Video is not recorded every episode, see capped_cubic_video_schedule for details.
49 |
50 | # Close the env and write monitor result info to disk
51 | env.close()
52 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_envs.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym import envs
5 | from gym.envs.tests.spec_list import spec_list
6 |
7 | # This runs a smoketest on each official registered env. We may want
8 | # to try also running environments which are not officially registered
9 | # envs.
10 | @pytest.mark.parametrize("spec", spec_list)
11 | def test_env(spec):
12 | # Capture warnings
13 | with pytest.warns(None) as warnings:
14 | env = spec.make()
15 |
16 | # Check that dtype is explicitly declared for gym.Box spaces
17 | for warning_msg in warnings:
18 | assert not 'autodetected dtype' in str(warning_msg.message)
19 |
20 | ob_space = env.observation_space
21 | act_space = env.action_space
22 | ob = env.reset()
23 | assert ob_space.contains(ob), 'Reset observation: {!r} not in space'.format(ob)
24 | a = act_space.sample()
25 | observation, reward, done, _info = env.step(a)
26 | assert ob_space.contains(observation), 'Step observation: {!r} not in space'.format(observation)
27 | assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env)
28 | assert isinstance(done, bool), "Expected {} to be a boolean".format(done)
29 |
30 | for mode in env.metadata.get('render.modes', []):
31 | env.render(mode=mode)
32 |
33 | # Make sure we can render the environment after close.
34 | for mode in env.metadata.get('render.modes', []):
35 | env.render(mode=mode)
36 |
37 | env.close()
38 |
39 | # Run a longer rollout on some environments
40 | def test_random_rollout():
41 | for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
42 | agent = lambda ob: env.action_space.sample()
43 | ob = env.reset()
44 | for _ in range(10):
45 | assert env.observation_space.contains(ob)
46 | a = agent(ob)
47 | assert env.action_space.contains(a)
48 | (ob, _reward, done, _info) = env.step(a)
49 | if done: break
50 | env.close()
51 |
--------------------------------------------------------------------------------
/gym/spaces/box.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym import logger
5 |
6 | class Box(gym.Space):
7 | """
8 | A box in R^n.
9 | I.e., each coordinate is bounded.
10 |
11 | Example usage:
12 | self.action_space = spaces.Box(low=-10, high=10, shape=(1,))
13 | """
14 | def __init__(self, low=None, high=None, shape=None, dtype=None):
15 | """
16 | Two kinds of valid input:
17 | Box(low=-1.0, high=1.0, shape=(3,4)) # low and high are scalars, and shape is provided
18 | Box(low=np.array([-1.0,-2.0]), high=np.array([2.0,4.0])) # low and high are arrays of the same shape
19 | """
20 | if shape is None:
21 | assert low.shape == high.shape
22 | shape = low.shape
23 | else:
24 | assert np.isscalar(low) and np.isscalar(high)
25 | low = low + np.zeros(shape)
26 | high = high + np.zeros(shape)
27 | if dtype is None: # Autodetect type
28 | if (high == 255).all():
29 | dtype = np.uint8
30 | else:
31 | dtype = np.float32
32 | logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype))
33 | self.low = low.astype(dtype)
34 | self.high = high.astype(dtype)
35 | gym.Space.__init__(self, shape, dtype)
36 |
37 | def sample(self):
38 | return gym.spaces.np_random.uniform(low=self.low, high=self.high + (0 if self.dtype.kind == 'f' else 1), size=self.low.shape).astype(self.dtype)
39 |
40 | def contains(self, x):
41 | return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()
42 |
43 | def to_jsonable(self, sample_n):
44 | return np.array(sample_n).tolist()
45 |
46 | def from_jsonable(self, sample_n):
47 | return [np.asarray(sample) for sample in sample_n]
48 |
49 | def __repr__(self):
50 | return "Box" + str(self.shape)
51 |
52 | def __eq__(self, other):
53 | return np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
54 |
--------------------------------------------------------------------------------
/gym/envs/robotics/hand_env.py:
--------------------------------------------------------------------------------
1 | import os
2 | import copy
3 | import numpy as np
4 |
5 | import gym
6 | from gym import error, spaces
7 | from gym.utils import seeding
8 | from gym.envs.robotics import robot_env
9 |
10 |
11 | class HandEnv(robot_env.RobotEnv):
12 | def __init__(self, model_path, n_substeps, initial_qpos, relative_control):
13 | self.relative_control = relative_control
14 |
15 | super(HandEnv, self).__init__(
16 | model_path=model_path, n_substeps=n_substeps, n_actions=20,
17 | initial_qpos=initial_qpos)
18 |
19 | # RobotEnv methods
20 | # ----------------------------
21 |
22 | def _set_action(self, action):
23 | assert action.shape == (20,)
24 |
25 | ctrlrange = self.sim.model.actuator_ctrlrange
26 | actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.
27 | if self.relative_control:
28 | actuation_center = np.zeros_like(action)
29 | for i in range(self.sim.data.ctrl.shape[0]):
30 | actuation_center[i] = self.sim.data.get_joint_qpos(
31 | self.sim.model.actuator_names[i].replace(':A_', ':'))
32 | for joint_name in ['FF', 'MF', 'RF', 'LF']:
33 | act_idx = self.sim.model.actuator_name2id(
34 | 'robot0:A_{}J1'.format(joint_name))
35 | actuation_center[act_idx] += self.sim.data.get_joint_qpos(
36 | 'robot0:{}J0'.format(joint_name))
37 | else:
38 | actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.
39 | self.sim.data.ctrl[:] = actuation_center + action * actuation_range
40 | self.sim.data.ctrl[:] = np.clip(self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1])
41 |
42 | def _viewer_setup(self):
43 | body_id = self.sim.model.body_name2id('robot0:palm')
44 | lookat = self.sim.data.body_xpos[body_id]
45 | for idx, value in enumerate(lookat):
46 | self.viewer.cam.lookat[idx] = value
47 | self.viewer.cam.distance = 0.5
48 | self.viewer.cam.azimuth = 55.
49 | self.viewer.cam.elevation = -25.
50 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/inverted_double_pendulum.xml:
--------------------------------------------------------------------------------
1 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/pusher.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | import mujoco_py
6 |
7 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
8 | def __init__(self):
9 | utils.EzPickle.__init__(self)
10 | mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
11 |
12 | def step(self, a):
13 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
14 | vec_2 = self.get_body_com("object") - self.get_body_com("goal")
15 |
16 | reward_near = - np.linalg.norm(vec_1)
17 | reward_dist = - np.linalg.norm(vec_2)
18 | reward_ctrl = - np.square(a).sum()
19 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
20 |
21 | self.do_simulation(a, self.frame_skip)
22 | ob = self._get_obs()
23 | done = False
24 | return ob, reward, done, dict(reward_dist=reward_dist,
25 | reward_ctrl=reward_ctrl)
26 |
27 | def viewer_setup(self):
28 | self.viewer.cam.trackbodyid = -1
29 | self.viewer.cam.distance = 4.0
30 |
31 | def reset_model(self):
32 | qpos = self.init_qpos
33 |
34 | self.goal_pos = np.asarray([0, 0])
35 | while True:
36 | self.cylinder_pos = np.concatenate([
37 | self.np_random.uniform(low=-0.3, high=0, size=1),
38 | self.np_random.uniform(low=-0.2, high=0.2, size=1)])
39 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
40 | break
41 |
42 | qpos[-4:-2] = self.cylinder_pos
43 | qpos[-2:] = self.goal_pos
44 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
45 | high=0.005, size=self.model.nv)
46 | qvel[-4:] = 0
47 | self.set_state(qpos, qvel)
48 | return self._get_obs()
49 |
50 | def _get_obs(self):
51 | return np.concatenate([
52 | self.sim.data.qpos.flat[:7],
53 | self.sim.data.qvel.flat[:7],
54 | self.get_body_com("tips_arm"),
55 | self.get_body_com("object"),
56 | self.get_body_com("goal"),
57 | ])
58 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/shared_asset.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/utils/atomic_write.py:
--------------------------------------------------------------------------------
1 | # Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
2 |
3 | import os
4 | from contextlib import contextmanager
5 |
6 | # We would ideally atomically replace any existing file with the new
7 | # version. However, on Windows there's no Python-only solution prior
8 | # to Python 3.3. (This library includes a C extension to do so:
9 | # https://pypi.python.org/pypi/pyosreplace/0.1.)
10 | #
11 | # Correspondingly, we make a best effort, but on Python < 3.3 use a
12 | # replace method which could result in the file temporarily
13 | # disappearing.
14 | import sys
15 | if sys.version_info >= (3, 3):
16 | # Python 3.3 and up have a native `replace` method
17 | from os import replace
18 | elif sys.platform.startswith("win"):
19 | def replace(src, dst):
20 | # TODO: on Windows, this will raise if the file is in use,
21 | # which is possible. We'll need to make this more robust over
22 | # time.
23 | try:
24 | os.remove(dst)
25 | except OSError:
26 | pass
27 | os.rename(src, dst)
28 | else:
29 | # POSIX rename() is always atomic
30 | from os import rename as replace
31 |
32 | @contextmanager
33 | def atomic_write(filepath, binary=False, fsync=False):
34 | """ Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
35 |
36 | :param filepath: the file path to be opened
37 | :param binary: whether to open the file in a binary mode instead of textual
38 | :param fsync: whether to force write the file to disk
39 | """
40 |
41 | tmppath = filepath + '~'
42 | while os.path.isfile(tmppath):
43 | tmppath += '~'
44 | try:
45 | with open(tmppath, 'wb' if binary else 'w') as file:
46 | yield file
47 | if fsync:
48 | file.flush()
49 | os.fsync(file.fileno())
50 | replace(tmppath, filepath)
51 | finally:
52 | try:
53 | os.remove(tmppath)
54 | except (IOError, OSError):
55 | pass
56 |
--------------------------------------------------------------------------------
/gym/utils/closer.py:
--------------------------------------------------------------------------------
1 | import atexit
2 | import threading
3 | import weakref
4 |
5 | class Closer(object):
6 | """A registry that ensures your objects get closed, whether manually,
7 | upon garbage collection, or upon exit. To work properly, your
8 | objects need to cooperate and do something like the following:
9 |
10 | ```
11 | closer = Closer()
12 | class Example(object):
13 | def __init__(self):
14 | self._id = closer.register(self)
15 |
16 | def close(self):
17 | # Probably worth making idempotent too!
18 | ...
19 | closer.unregister(self._id)
20 |
21 | def __del__(self):
22 | self.close()
23 | ```
24 |
25 | That is, your objects should:
26 |
27 | - register() themselves and save the returned ID
28 | - unregister() themselves upon close()
29 | - include a __del__ method which close()'s the object
30 | """
31 |
32 | def __init__(self, atexit_register=True):
33 | self.lock = threading.Lock()
34 | self.next_id = -1
35 | self.closeables = weakref.WeakValueDictionary()
36 |
37 | if atexit_register:
38 | atexit.register(self.close)
39 |
40 | def generate_next_id(self):
41 | with self.lock:
42 | self.next_id += 1
43 | return self.next_id
44 |
45 | def register(self, closeable):
46 | """Registers an object with a 'close' method.
47 |
48 | Returns:
49 | int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired.
50 | """
51 | assert hasattr(closeable, 'close'), 'No close method for {}'.format(closeable)
52 |
53 | next_id = self.generate_next_id()
54 | self.closeables[next_id] = closeable
55 | return next_id
56 |
57 | def unregister(self, id):
58 | assert id is not None
59 | if id in self.closeables:
60 | del self.closeables[id]
61 |
62 | def close(self):
63 | # Explicitly fetch all monitors first so that they can't disappear while
64 | # we iterate. cf. http://stackoverflow.com/a/12429620
65 | closeables = list(self.closeables.values())
66 | for closeable in closeables:
67 | closeable.close()
68 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/humanoid.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym.envs.mujoco import mujoco_env
3 | from gym import utils
4 |
5 | def mass_center(model, sim):
6 | mass = np.expand_dims(model.body_mass, 1)
7 | xpos = sim.data.xipos
8 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
9 |
10 | class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
11 | def __init__(self):
12 | mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
13 | utils.EzPickle.__init__(self)
14 |
15 | def _get_obs(self):
16 | data = self.sim.data
17 | return np.concatenate([data.qpos.flat[2:],
18 | data.qvel.flat,
19 | data.cinert.flat,
20 | data.cvel.flat,
21 | data.qfrc_actuator.flat,
22 | data.cfrc_ext.flat])
23 |
24 | def step(self, a):
25 | pos_before = mass_center(self.model, self.sim)
26 | self.do_simulation(a, self.frame_skip)
27 | pos_after = mass_center(self.model, self.sim)
28 | alive_bonus = 5.0
29 | data = self.sim.data
30 | lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
31 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
32 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
33 | quad_impact_cost = min(quad_impact_cost, 10)
34 | reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
35 | qpos = self.sim.data.qpos
36 | done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
37 | return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
38 |
39 | def reset_model(self):
40 | c = 0.01
41 | self.set_state(
42 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
43 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
44 | )
45 | return self._get_obs()
46 |
47 | def viewer_setup(self):
48 | self.viewer.cam.trackbodyid = 1
49 | self.viewer.cam.distance = self.model.stat.extent * 1.0
50 | self.viewer.cam.lookat[2] = 2.0
51 | self.viewer.cam.elevation = -20
52 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/thrower.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | self._ball_hit_ground = False
9 | self._ball_hit_location = None
10 | mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5)
11 |
12 | def step(self, a):
13 | ball_xy = self.get_body_com("ball")[:2]
14 | goal_xy = self.get_body_com("goal")[:2]
15 |
16 | if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25:
17 | self._ball_hit_ground = True
18 | self._ball_hit_location = self.get_body_com("ball")
19 |
20 | if self._ball_hit_ground:
21 | ball_hit_xy = self._ball_hit_location[:2]
22 | reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy)
23 | else:
24 | reward_dist = -np.linalg.norm(ball_xy - goal_xy)
25 | reward_ctrl = - np.square(a).sum()
26 |
27 | reward = reward_dist + 0.002 * reward_ctrl
28 | self.do_simulation(a, self.frame_skip)
29 | ob = self._get_obs()
30 | done = False
31 | return ob, reward, done, dict(reward_dist=reward_dist,
32 | reward_ctrl=reward_ctrl)
33 |
34 | def viewer_setup(self):
35 | self.viewer.cam.trackbodyid = 0
36 | self.viewer.cam.distance = 4.0
37 |
38 | def reset_model(self):
39 | self._ball_hit_ground = False
40 | self._ball_hit_location = None
41 |
42 | qpos = self.init_qpos
43 | self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3),
44 | self.np_random.uniform(low=-0.3, high=0.3)])
45 |
46 | qpos[-9:-7] = self.goal
47 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
48 | high=0.005, size=self.model.nv)
49 | qvel[7:] = 0
50 | self.set_state(qpos, qvel)
51 | return self._get_obs()
52 |
53 | def _get_obs(self):
54 | return np.concatenate([
55 | self.sim.data.qpos.flat[:7],
56 | self.sim.data.qvel.flat[:7],
57 | self.get_body_com("r_wrist_roll_link"),
58 | self.get_body_com("ball"),
59 | self.get_body_com("goal"),
60 | ])
61 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_pen.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/swimmer.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/nchain.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.utils import seeding
4 |
5 | class NChainEnv(gym.Env):
6 | """n-Chain environment
7 |
8 | This game presents moves along a linear chain of states, with two actions:
9 | 0) forward, which moves along the chain but returns no reward
10 | 1) backward, which returns to the beginning and has a small reward
11 |
12 | The end of the chain, however, presents a large reward, and by moving
13 | 'forward' at the end of the chain this large reward can be repeated.
14 |
15 | At each action, there is a small probability that the agent 'slips' and the
16 | opposite transition is instead taken.
17 |
18 | The observed state is the current state in the chain (0 to n-1).
19 |
20 | This environment is described in section 6.1 of:
21 | A Bayesian Framework for Reinforcement Learning by Malcolm Strens (2000)
22 | http://ceit.aut.ac.ir/~shiry/lecture/machine-learning/papers/BRL-2000.pdf
23 | """
24 | def __init__(self, n=5, slip=0.2, small=2, large=10):
25 | self.n = n
26 | self.slip = slip # probability of 'slipping' an action
27 | self.small = small # payout for 'backwards' action
28 | self.large = large # payout at end of chain for 'forwards' action
29 | self.state = 0 # Start at beginning of the chain
30 | self.action_space = spaces.Discrete(2)
31 | self.observation_space = spaces.Discrete(self.n)
32 | self.seed()
33 |
34 | def seed(self, seed=None):
35 | self.np_random, seed = seeding.np_random(seed)
36 | return [seed]
37 |
38 | def step(self, action):
39 | assert self.action_space.contains(action)
40 | if self.np_random.rand() < self.slip:
41 | action = not action # agent slipped, reverse action taken
42 | if action: # 'backwards': go back to the beginning, get small reward
43 | reward = self.small
44 | self.state = 0
45 | elif self.state < self.n - 1: # 'forwards': go up along the chain
46 | reward = 0
47 | self.state += 1
48 | else: # 'forwards': stay at the end of the chain, collect large reward
49 | reward = self.large
50 | done = False
51 | return self.state, reward, done, {}
52 |
53 | def reset(self):
54 | self.state = 0
55 | return self.state
56 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/hotter_colder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym import spaces
5 | from gym.utils import seeding
6 |
7 |
8 | class HotterColder(gym.Env):
9 | """Hotter Colder
10 | The goal of hotter colder is to guess closer to a randomly selected number
11 |
12 | After each step the agent receives an observation of:
13 | 0 - No guess yet submitted (only after reset)
14 | 1 - Guess is lower than the target
15 | 2 - Guess is equal to the target
16 | 3 - Guess is higher than the target
17 |
18 | The rewards is calculated as:
19 | (min(action, self.number) + self.range) / (max(action, self.number) + self.range)
20 |
21 | Ideally an agent will be able to recognise the 'scent' of a higher reward and
22 | increase the rate in which is guesses in that direction until the reward reaches
23 | its maximum
24 | """
25 | def __init__(self):
26 | self.range = 1000 # +/- value the randomly select number can be between
27 | self.bounds = 2000 # Action space bounds
28 |
29 | self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
30 | dtype=np.float32)
31 | self.observation_space = spaces.Discrete(4)
32 |
33 | self.number = 0
34 | self.guess_count = 0
35 | self.guess_max = 200
36 | self.observation = 0
37 |
38 | self.seed()
39 | self.reset()
40 |
41 | def seed(self, seed=None):
42 | self.np_random, seed = seeding.np_random(seed)
43 | return [seed]
44 |
45 | def step(self, action):
46 | assert self.action_space.contains(action)
47 |
48 | if action < self.number:
49 | self.observation = 1
50 |
51 | elif action == self.number:
52 | self.observation = 2
53 |
54 | elif action > self.number:
55 | self.observation = 3
56 |
57 | reward = ((min(action, self.number) + self.bounds) / (max(action, self.number) + self.bounds)) ** 2
58 |
59 | self.guess_count += 1
60 | done = self.guess_count >= self.guess_max
61 |
62 | return self.observation, reward[0], done, {"number": self.number, "guesses": self.guess_count}
63 |
64 | def reset(self):
65 | self.number = self.np_random.uniform(-self.range, self.range)
66 | self.guess_count = 0
67 | self.observation = 0
68 | return self.observation
69 |
--------------------------------------------------------------------------------
/docs/environments.md:
--------------------------------------------------------------------------------
1 | # Environments
2 |
3 | The gym comes prepackaged with many many environments. It's this common API around many environments that makes the gym so great. Here we will list additional environments that do not come prepacked with the gym. Submit another to this list via a pull-request.
4 |
5 | _**NOTICE**: Its possible that in time OpenAI will develop a full fledged repository of supplemental environments. Until then this bit of markdown will suffice._
6 |
7 | ## PGE: Parallel Game Engine
8 |
9 | PGE is a FOSS 3D engine for AI simulations, and can interoperate with the Gym. Contains environments with modern 3D graphics, and uses Bullet for physics.
10 |
11 | Learn more here: https://github.com/222464/PGE
12 |
13 | ## gym-inventory: Inventory Control Environments
14 |
15 | gym-inventory is a single agent domain featuring discrete state and action spaces that an AI agent might encounter in inventory control problems.
16 |
17 | Learn more here: https://github.com/paulhendricks/gym-inventory
18 |
19 | ## gym-gazebo: training Robots in Gazebo
20 |
21 | gym-gazebo presents an extension of the initial OpenAI gym for robotics using ROS and Gazebo, an advanced 3D modeling and
22 | rendering tool.
23 |
24 | Learn more here: https://github.com/erlerobot/gym-gazebo/
25 |
26 | ## gym-maze: 2D maze environment
27 | A simple 2D maze environment where an agent finds its way from the start position to the goal.
28 |
29 | Learn more here: https://github.com/tuzzer/gym-maze/
30 |
31 | ## gym-minigrid: Minimalistic Gridworld Environment
32 |
33 | A minimalistic gridworld environment. Seeks to minimize software dependencies, be easy to extend and deliver good performance for faster training.
34 |
35 | Learn more here: https://github.com/maximecb/gym-minigrid
36 |
37 | ## gym-sokoban: 2D Transportation Puzzles
38 |
39 | The environment consists of transportation puzzles in which the player's goal is to push all boxes on the warehouse's storage locations.
40 | The advantage of the environment is that it generates a new random level every time it is initialized or reset, which prevents over fitting to predefined levels.
41 |
42 | Learn more here: https://github.com/mpSchrader/gym-sokoban
43 |
44 | ## gym-duckietown: Lane-Following Simulator for Duckietown
45 |
46 | A lane-following simulator built for the [Duckietown](http://duckietown.org/) project (small-scale self-driving car course).
47 |
48 | Learn more here: https://github.com/duckietown/gym-duckietown
49 |
--------------------------------------------------------------------------------
/examples/scripts/sim_env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import gym
3 | from gym import spaces, envs
4 | import argparse
5 | import numpy as np
6 | import itertools
7 | import time
8 |
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("env")
11 | parser.add_argument("--mode", choices=["noop", "random", "static", "human"],
12 | default="random")
13 | parser.add_argument("--max_steps", type=int, default=0)
14 | parser.add_argument("--fps",type=float)
15 | parser.add_argument("--once", action="store_true")
16 | parser.add_argument("--ignore_done", action="store_true")
17 | args = parser.parse_args()
18 |
19 | env = envs.make(args.env)
20 | ac_space = env.action_space
21 |
22 | fps = args.fps or env.metadata.get('video.frames_per_second') or 100
23 | if args.max_steps == 0: args.max_steps = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']
24 |
25 | while True:
26 | env.reset()
27 | env.render(mode='human')
28 | print("Starting a new trajectory")
29 | for t in range(args.max_steps) if args.max_steps else itertools.count():
30 | done = False
31 | if args.mode == "noop":
32 | if isinstance(ac_space, spaces.Box):
33 | a = np.zeros(ac_space.shape)
34 | elif isinstance(ac_space, spaces.Discrete):
35 | a = 0
36 | else:
37 | raise NotImplementedError("noop not implemented for class {}".format(type(ac_space)))
38 | _, _, done, _ = env.step(a)
39 | time.sleep(1.0/fps)
40 | elif args.mode == "random":
41 | a = ac_space.sample()
42 | _, _, done, _ = env.step(a)
43 | time.sleep(1.0/fps)
44 | elif args.mode == "static":
45 | time.sleep(1.0/fps)
46 | elif args.mode == "human":
47 | a = raw_input("type action from {0,...,%i} and press enter: "%(ac_space.n-1))
48 | try:
49 | a = int(a)
50 | except ValueError:
51 | print("WARNING: ignoring illegal action '{}'.".format(a))
52 | a = 0
53 | if a >= ac_space.n:
54 | print("WARNING: ignoring illegal action {}.".format(a))
55 | a = 0
56 | _, _, done, _ = env.step(a)
57 |
58 | env.render()
59 | if done and not args.ignore_done: break
60 | print("Done after {} steps".format(t+1))
61 | if args.once:
62 | break
63 | else:
64 | raw_input("Press enter to continue")
65 |
--------------------------------------------------------------------------------
/docs/agents.md:
--------------------------------------------------------------------------------
1 | # Agents
2 |
3 | An "agent" describes the method of running an RL algorithm against an environment in the gym. The agent may contain the algorithm itself or simply provide an integration between an algorithm and the gym environments.
4 |
5 | ## RandomAgent
6 |
7 | A sample agent located in this repo at `gym/examples/agents/random_agent.py`. This simple agent leverages the environments ability to produce a random valid action and does so for each step.
8 |
9 | ## cem.py
10 |
11 | A generic Cross-Entropy agent located in this repo at `gym/examples/agents/cem.py`. This agent defaults to 10 iterations of 25 episodes considering the top 20% "elite".
12 |
13 | ## dqn
14 |
15 | This is a very basic DQN (with experience replay) implementation, which uses OpenAI's gym environment and Keras/Theano neural networks. [/sherjilozair/dqn](https://github.com/sherjilozair/dqn)
16 |
17 | ## Simple DQN
18 |
19 | Simple, fast and easy to extend DQN implementation using [Neon](https://github.com/NervanaSystems/neon) deep learning library. Comes with out-of-box tools to train, test and visualize models. For details see [this blog post](https://www.nervanasys.com/deep-reinforcement-learning-with-neon/) or check out the [repo](https://github.com/tambetm/simple_dqn).
20 |
21 | ## AgentNet
22 | A library that allows you to develop custom deep/convolutional/recurrent reinforcement learning agent with full integration with Theano/Lasagne. Also contains a toolkit for various reinforcement learning algorithms, policies, memory augmentations, etc.
23 |
24 | - The repo's here: [AgentNet](https://github.com/yandexdataschool/AgentNet)
25 | - [A step-by-step demo for Atari SpaceInvaders ](https://github.com/yandexdataschool/AgentNet/blob/master/examples/Playing%20Atari%20with%20Deep%20Reinforcement%20Learning%20%28OpenAI%20Gym%29.ipynb)
26 |
27 | ## rllab
28 |
29 | a framework for developing and evaluating reinforcement learning algorithms, fully compatible with OpenAI Gym. It includes a wide range of continuous control tasks plus implementations of many algorithms. [/rllab/rllab](https://github.com/rllab/rllab)
30 |
31 | ## [keras-rl](https://github.com/matthiasplappert/keras-rl)
32 |
33 | [keras-rl](https://github.com/matthiasplappert/keras-rl) implements some state-of-the art deep reinforcement learning algorithms. It was built with OpenAI Gym in mind, and also built on top of the deep learning library [Keras](https://keras.io/) and utilises similar design patterns like callbacks and user-definable metrics.
34 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_egg.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/reacher.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/manipulate_block.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/examples/agents/keyboard_agent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import print_function
3 |
4 | import sys, gym, time
5 |
6 | #
7 | # Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
8 | #
9 | # python keyboard_agent.py SpaceInvadersNoFrameskip-v4
10 | #
11 |
12 | env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
13 |
14 | if not hasattr(env.action_space, 'n'):
15 | raise Exception('Keyboard agent only supports discrete action spaces')
16 | ACTIONS = env.action_space.n
17 | SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
18 | # can test what skip is still usable.
19 |
20 | human_agent_action = 0
21 | human_wants_restart = False
22 | human_sets_pause = False
23 |
24 | def key_press(key, mod):
25 | global human_agent_action, human_wants_restart, human_sets_pause
26 | if key==0xff0d: human_wants_restart = True
27 | if key==32: human_sets_pause = not human_sets_pause
28 | a = int( key - ord('0') )
29 | if a <= 0 or a >= ACTIONS: return
30 | human_agent_action = a
31 |
32 | def key_release(key, mod):
33 | global human_agent_action
34 | a = int( key - ord('0') )
35 | if a <= 0 or a >= ACTIONS: return
36 | if human_agent_action == a:
37 | human_agent_action = 0
38 |
39 | env.render()
40 | env.unwrapped.viewer.window.on_key_press = key_press
41 | env.unwrapped.viewer.window.on_key_release = key_release
42 |
43 | def rollout(env):
44 | global human_agent_action, human_wants_restart, human_sets_pause
45 | human_wants_restart = False
46 | obser = env.reset()
47 | skip = 0
48 | total_reward = 0
49 | total_timesteps = 0
50 | while 1:
51 | if not skip:
52 | #print("taking action {}".format(human_agent_action))
53 | a = human_agent_action
54 | total_timesteps += 1
55 | skip = SKIP_CONTROL
56 | else:
57 | skip -= 1
58 |
59 | obser, r, done, info = env.step(a)
60 | if r != 0:
61 | print("reward %0.3f" % r)
62 | total_reward += r
63 | window_still_open = env.render()
64 | if window_still_open==False: return False
65 | if done: break
66 | if human_wants_restart: break
67 | while human_sets_pause:
68 | env.render()
69 | time.sleep(0.1)
70 | time.sleep(0.1)
71 | print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
72 |
73 | print("ACTIONS={}".format(ACTIONS))
74 | print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
75 | print("No keys pressed is taking action 0")
76 |
77 | while 1:
78 | window_still_open = rollout(env)
79 | if window_still_open==False: break
80 |
81 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/striker.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | self._striked = False
9 | self._min_strike_dist = np.inf
10 | self.strike_threshold = 0.1
11 | mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
12 |
13 | def step(self, a):
14 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
15 | vec_2 = self.get_body_com("object") - self.get_body_com("goal")
16 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2))
17 |
18 | if np.linalg.norm(vec_1) < self.strike_threshold:
19 | self._striked = True
20 | self._strike_pos = self.get_body_com("tips_arm")
21 |
22 | if self._striked:
23 | vec_3 = self.get_body_com("object") - self._strike_pos
24 | reward_near = - np.linalg.norm(vec_3)
25 | else:
26 | reward_near = - np.linalg.norm(vec_1)
27 |
28 | reward_dist = - np.linalg.norm(self._min_strike_dist)
29 | reward_ctrl = - np.square(a).sum()
30 | reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
31 |
32 | self.do_simulation(a, self.frame_skip)
33 | ob = self._get_obs()
34 | done = False
35 | return ob, reward, done, dict(reward_dist=reward_dist,
36 | reward_ctrl=reward_ctrl)
37 |
38 | def viewer_setup(self):
39 | self.viewer.cam.trackbodyid = 0
40 | self.viewer.cam.distance = 4.0
41 |
42 | def reset_model(self):
43 | self._min_strike_dist = np.inf
44 | self._striked = False
45 | self._strike_pos = None
46 |
47 | qpos = self.init_qpos
48 |
49 | self.ball = np.array([0.5, -0.175])
50 | while True:
51 | self.goal = np.concatenate([
52 | self.np_random.uniform(low=0.15, high=0.7, size=1),
53 | self.np_random.uniform(low=0.1, high=1.0, size=1)])
54 | if np.linalg.norm(self.ball - self.goal) > 0.17:
55 | break
56 |
57 | qpos[-9:-7] = [self.ball[1], self.ball[0]]
58 | qpos[-7:-5] = self.goal
59 | diff = self.ball - self.goal
60 | angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
61 | qpos[-1] = angle / 3.14
62 | qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
63 | size=self.model.nv)
64 | qvel[7:] = 0
65 | self.set_state(qpos, qvel)
66 | return self._get_obs()
67 |
68 | def _get_obs(self):
69 | return np.concatenate([
70 | self.sim.data.qpos.flat[:7],
71 | self.sim.data.qvel.flat[:7],
72 | self.get_body_com("tips_arm"),
73 | self.get_body_com("object"),
74 | self.get_body_com("goal"),
75 | ])
76 |
--------------------------------------------------------------------------------
/examples/scripts/benchmark_runner:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Run all the tasks on a benchmark using a random agent.
4 | #
5 | # This script assumes you have set an OPENAI_GYM_API_KEY environment
6 | # variable. You can find your API key in the web interface:
7 | # https://gym.openai.com/settings/profile.
8 | #
9 | import argparse
10 | import logging
11 | import os
12 | import sys
13 |
14 | import gym
15 | # In modules, use `logger = logging.getLogger(__name__)`
16 | from gym import wrappers
17 | from gym.scoreboard.scoring import benchmark_score_from_local
18 |
19 | import openai_benchmark
20 |
21 | logger = logging.getLogger()
22 |
23 | def main():
24 | parser = argparse.ArgumentParser(description=None)
25 | parser.add_argument('-b', '--benchmark-id', help='id of benchmark to run e.g. Atari7Ram-v0')
26 | parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
27 | parser.add_argument('-f', '--force', action='store_true', dest='force', default=False)
28 | parser.add_argument('-t', '--training-dir', default="/tmp/gym-results", help='What directory to upload.')
29 | args = parser.parse_args()
30 |
31 | if args.verbosity == 0:
32 | logger.setLevel(logging.INFO)
33 | elif args.verbosity >= 1:
34 | logger.setLevel(logging.DEBUG)
35 |
36 | benchmark_id = args.benchmark_id
37 | if benchmark_id is None:
38 | logger.info("Must supply a valid benchmark")
39 | return 1
40 |
41 | try:
42 | benchmark = gym.benchmark_spec(benchmark_id)
43 | except Exception:
44 | logger.info("Invalid benchmark")
45 | return 1
46 |
47 | # run benchmark tasks
48 | for task in benchmark.tasks:
49 | logger.info("Running on env: {}".format(task.env_id))
50 | for trial in range(task.trials):
51 | env = gym.make(task.env_id)
52 | training_dir_name = "{}/{}-{}".format(args.training_dir, task.env_id, trial)
53 | env = wrappers.Monitor(env, training_dir_name, video_callable=False, force=args.force)
54 | env.reset()
55 | for _ in range(task.max_timesteps):
56 | o, r, done, _ = env.step(env.action_space.sample())
57 | if done:
58 | env.reset()
59 | env.close()
60 |
61 | logger.info("""Computing statistics for this benchmark run...
62 | {{
63 | score: {score},
64 | num_envs_solved: {num_envs_solved},
65 | summed_training_seconds: {summed_training_seconds},
66 | start_to_finish_seconds: {start_to_finish_seconds},
67 | }}
68 |
69 | """.rstrip().format(**benchmark_score_from_local(benchmark_id, args.training_dir)))
70 |
71 | logger.info("""Done running, upload results using the following command:
72 |
73 | python -c "import gym; gym.upload('{}', benchmark_id='{}', algorithm_id='(unknown)')"
74 |
75 | """.rstrip().format(args.training_dir, benchmark_id))
76 |
77 | return 0
78 |
79 | if __name__ == '__main__':
80 | sys.exit(main())
81 |
--------------------------------------------------------------------------------
/gym/envs/robotics/README.md:
--------------------------------------------------------------------------------
1 | # Robotics environments
2 |
3 | Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics).
4 |
5 | If you use these environments, please cite the following paper:
6 |
7 | ```
8 | @misc{1802.09464,
9 | Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
10 | Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
11 | Year = {2018},
12 | Eprint = {arXiv:1802.09464},
13 | }
14 | ```
15 |
16 | ## Fetch environments
17 |
18 |
19 | [FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position.
20 |
21 |
22 |
23 |
24 | [FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
25 |
26 |
27 |
28 |
29 | [FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position.
30 |
31 |
32 |
33 |
34 | [FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
35 |
36 | ## Shadow Dexterous Hand environments
37 |
38 |
39 | [HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
40 |
41 |
42 |
43 |
44 | [HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
45 |
46 |
47 |
48 |
49 | [HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
50 |
51 |
52 |
53 |
54 | [HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.
55 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_determinism.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | from gym import spaces
4 | from gym.envs.tests.spec_list import spec_list
5 |
6 | @pytest.mark.parametrize("spec", spec_list)
7 | def test_env(spec):
8 |
9 | # Note that this precludes running this test in multiple
10 | # threads. However, we probably already can't do multithreading
11 | # due to some environments.
12 | spaces.seed(0)
13 |
14 | env1 = spec.make()
15 | env1.seed(0)
16 | action_samples1 = [env1.action_space.sample() for i in range(4)]
17 | initial_observation1 = env1.reset()
18 | step_responses1 = [env1.step(action) for action in action_samples1]
19 | env1.close()
20 |
21 | spaces.seed(0)
22 |
23 | env2 = spec.make()
24 | env2.seed(0)
25 | action_samples2 = [env2.action_space.sample() for i in range(4)]
26 | initial_observation2 = env2.reset()
27 | step_responses2 = [env2.step(action) for action in action_samples2]
28 | env2.close()
29 |
30 | for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
31 | try:
32 | assert_equals(action_sample1, action_sample2)
33 | except AssertionError:
34 | print('env1.action_space=', env1.action_space)
35 | print('env2.action_space=', env2.action_space)
36 | print('action_samples1=', action_samples1)
37 | print('action_samples2=', action_samples2)
38 | print('[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2))
39 | raise
40 |
41 | # Don't check rollout equality if it's a a nondeterministic
42 | # environment.
43 | if spec.nondeterministic:
44 | return
45 |
46 | assert_equals(initial_observation1, initial_observation2)
47 |
48 | for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
49 | assert_equals(o1, o2, '[{}] '.format(i))
50 | assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
51 | assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)
52 |
53 | # Go returns a Pachi game board in info, which doesn't
54 | # properly check equality. For now, we hack around this by
55 | # just skipping Go.
56 | if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
57 | assert_equals(i1, i2, '[{}] '.format(i))
58 |
59 | def assert_equals(a, b, prefix=None):
60 | assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b)
61 | if isinstance(a, dict):
62 | assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b)
63 |
64 | for k in a.keys():
65 | v_a = a[k]
66 | v_b = b[k]
67 | assert_equals(v_a, v_b)
68 | elif isinstance(a, np.ndarray):
69 | np.testing.assert_array_equal(a, b)
70 | elif isinstance(a, tuple):
71 | for elem_from_a, elem_from_b in zip(a, b):
72 | assert_equals(elem_from_a, elem_from_b)
73 | else:
74 | assert a == b
75 |
--------------------------------------------------------------------------------
/gym/spaces/tests/test_spaces.py:
--------------------------------------------------------------------------------
1 | import json # note: ujson fails this test due to float equality
2 | from copy import copy
3 |
4 | import numpy as np
5 | import pytest
6 |
7 | from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict
8 |
9 |
10 | @pytest.mark.parametrize("space", [
11 | Discrete(3),
12 | Tuple([Discrete(5), Discrete(10)]),
13 | Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
14 | Tuple((Discrete(5), Discrete(2), Discrete(2))),
15 | MultiDiscrete([2, 2, 100]),
16 | Dict({"position": Discrete(5),
17 | "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
18 | ])
19 | def test_roundtripping(space):
20 | sample_1 = space.sample()
21 | sample_2 = space.sample()
22 | assert space.contains(sample_1)
23 | assert space.contains(sample_2)
24 | json_rep = space.to_jsonable([sample_1, sample_2])
25 |
26 | json_roundtripped = json.loads(json.dumps(json_rep))
27 |
28 | samples_after_roundtrip = space.from_jsonable(json_roundtripped)
29 | sample_1_prime, sample_2_prime = samples_after_roundtrip
30 |
31 | s1 = space.to_jsonable([sample_1])
32 | s1p = space.to_jsonable([sample_1_prime])
33 | s2 = space.to_jsonable([sample_2])
34 | s2p = space.to_jsonable([sample_2_prime])
35 | assert s1 == s1p, "Expected {} to equal {}".format(s1, s1p)
36 | assert s2 == s2p, "Expected {} to equal {}".format(s2, s2p)
37 |
38 |
39 | @pytest.mark.parametrize("space", [
40 | Discrete(3),
41 | Box(low=np.array([-10, 0]),high=np.array([10, 10])),
42 | Tuple([Discrete(5), Discrete(10)]),
43 | Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
44 | Tuple((Discrete(5), Discrete(2), Discrete(2))),
45 | MultiDiscrete([2, 2, 100]),
46 | MultiBinary(6),
47 | Dict({"position": Discrete(5),
48 | "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
49 | ])
50 | def test_equality(space):
51 | space1 = space
52 | space2 = copy(space)
53 | assert space1 == space2, "Expected {} to equal {}".format(space1, space2)
54 |
55 |
56 | @pytest.mark.parametrize("spaces", [
57 | (Discrete(3), Discrete(4)),
58 | (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
59 | (MultiBinary(8), MultiBinary(7)),
60 | (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
61 | Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
62 | (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
63 | (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
64 | (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),
65 | ])
66 | def test_inequality(spaces):
67 | space1, space2 = spaces
68 | assert space1 != space2, "Expected {} != {}".format(space1, space2)
69 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/hopper.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/gym/envs/README.md:
--------------------------------------------------------------------------------
1 | # Envs
2 |
3 | These are the core integrated environments. Note that we may later
4 | restructure any of the files, but will keep the environments available
5 | at the relevant package's top-level. So for example, you should access
6 | `AntEnv` as follows:
7 |
8 | ```
9 | # Will be supported in future releases
10 | from gym.envs import mujoco
11 | mujoco.AntEnv
12 | ```
13 |
14 | Rather than:
15 |
16 | ```
17 | # May break in future releases
18 | from gym.envs.mujoco import ant
19 | ant.AntEnv
20 | ```
21 |
22 | ## How to create new environments for Gym
23 |
24 | * Create a new repo called gym-foo, which should also be a PIP package.
25 |
26 | * A good example is https://github.com/openai/gym-soccer.
27 |
28 | * It should have at least the following files:
29 | ```sh
30 | gym-foo/
31 | README.md
32 | setup.py
33 | gym_foo/
34 | __init__.py
35 | envs/
36 | __init__.py
37 | foo_env.py
38 | foo_extrahard_env.py
39 | ```
40 |
41 | * `gym-foo/setup.py` should have:
42 |
43 | ```python
44 | from setuptools import setup
45 |
46 | setup(name='gym_foo',
47 | version='0.0.1',
48 | install_requires=['gym'] # And any other dependencies foo needs
49 | )
50 | ```
51 |
52 | * `gym-foo/gym_foo/__init__.py` should have:
53 | ```python
54 | from gym.envs.registration import register
55 |
56 | register(
57 | id='foo-v0',
58 | entry_point='gym_foo.envs:FooEnv',
59 | )
60 | register(
61 | id='foo-extrahard-v0',
62 | entry_point='gym_foo.envs:FooExtraHardEnv',
63 | )
64 | ```
65 |
66 | * `gym-foo/gym_foo/envs/__init__.py` should have:
67 | ```python
68 | from gym_foo.envs.foo_env import FooEnv
69 | from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv
70 | ```
71 |
72 | * `gym-foo/gym_foo/envs/foo_env.py` should look something like:
73 | ```python
74 | import gym
75 | from gym import error, spaces, utils
76 | from gym.utils import seeding
77 |
78 | class FooEnv(gym.Env):
79 | metadata = {'render.modes': ['human']}
80 |
81 | def __init__(self):
82 | ...
83 | def step(self, action):
84 | ...
85 | def reset(self):
86 | ...
87 | def render(self, mode='human', close=False):
88 | ...
89 | ```
90 |
91 | ## How to add new environments to Gym, within this repo (not recommended for new environments)
92 |
93 | 1. Write your environment in an existing collection or a new collection. All collections are subfolders of `/gym/envs'.
94 | 2. Import your environment into the `__init__.py` file of the collection. This file will be located at `/gym/envs/my_collection/__init__.py`. Add `from gym.envs.my_collection.my_awesome_env import MyEnv` to this file.
95 | 3. Register your env in `/gym/envs/__init__.py`:
96 |
97 | ```
98 | register(
99 | id='MyEnv-v0',
100 | entry_point='gym.envs.my_collection:MyEnv',
101 | )
102 | ```
103 |
104 | 4. Add your environment to the scoreboard in `/gym/scoreboard/__init__.py`:
105 |
106 | ```
107 | add_task(
108 | id='MyEnv-v0',
109 | summary="Super cool environment",
110 | group='my_collection',
111 | contributor='mygithubhandle',
112 | )
113 | ```
114 |
--------------------------------------------------------------------------------
/gym/spaces/dict_space.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from collections import OrderedDict
3 |
4 | class Dict(gym.Space):
5 | """
6 | A dictionary of simpler spaces.
7 |
8 | Example usage:
9 | self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
10 |
11 | Example usage [nested]:
12 | self.nested_observation_space = spaces.Dict({
13 | 'sensors': spaces.Dict({
14 | 'position': spaces.Box(low=-100, high=100, shape=(3,)),
15 | 'velocity': spaces.Box(low=-1, high=1, shape=(3,)),
16 | 'front_cam': spaces.Tuple((
17 | spaces.Box(low=0, high=1, shape=(10, 10, 3)),
18 | spaces.Box(low=0, high=1, shape=(10, 10, 3))
19 | )),
20 | 'rear_cam': spaces.Box(low=0, high=1, shape=(10, 10, 3)),
21 | }),
22 | 'ext_controller': spaces.MultiDiscrete([ [0,4], [0,1], [0,1] ]),
23 | 'inner_state':spaces.Dict({
24 | 'charge': spaces.Discrete(100),
25 | 'system_checks': spaces.MultiBinary(10),
26 | 'job_status': spaces.Dict({
27 | 'task': spaces.Discrete(5),
28 | 'progress': spaces.Box(low=0, high=100, shape=()),
29 | })
30 | })
31 | })
32 | """
33 | def __init__(self, spaces=None, **spaces_kwargs):
34 | assert (spaces is None) or (not spaces_kwargs), 'Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)'
35 | if spaces is None:
36 | spaces = spaces_kwargs
37 | if isinstance(spaces, dict) and not isinstance(spaces, OrderedDict):
38 | spaces = OrderedDict(sorted(list(spaces.items())))
39 | if isinstance(spaces, list):
40 | spaces = OrderedDict(spaces)
41 | self.spaces = spaces
42 | gym.Space.__init__(self, None, None) # None for shape and dtype, since it'll require special handling
43 |
44 | def sample(self):
45 | return OrderedDict([(k, space.sample()) for k, space in self.spaces.items()])
46 |
47 | def contains(self, x):
48 | if not isinstance(x, dict) or len(x) != len(self.spaces):
49 | return False
50 | for k, space in self.spaces.items():
51 | if k not in x:
52 | return False
53 | if not space.contains(x[k]):
54 | return False
55 | return True
56 |
57 | def __repr__(self):
58 | return "Dict(" + ", ". join([k + ":" + str(s) for k, s in self.spaces.items()]) + ")"
59 |
60 | def to_jsonable(self, sample_n):
61 | # serialize as dict-repr of vectors
62 | return {key: space.to_jsonable([sample[key] for sample in sample_n]) \
63 | for key, space in self.spaces.items()}
64 |
65 | def from_jsonable(self, sample_n):
66 | dict_of_list = {}
67 | for key, space in self.spaces.items():
68 | dict_of_list[key] = space.from_jsonable(sample_n[key])
69 | ret = []
70 | for i, _ in enumerate(dict_of_list[key]):
71 | entry = {}
72 | for key, value in dict_of_list.items():
73 | entry[key] = value[i]
74 | ret.append(entry)
75 | return ret
76 |
77 | def __eq__(self, other):
78 | return self.spaces == other.spaces
79 |
--------------------------------------------------------------------------------
/gym/envs/classic_control/pendulum.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.utils import seeding
4 | import numpy as np
5 | from os import path
6 |
7 | class PendulumEnv(gym.Env):
8 | metadata = {
9 | 'render.modes' : ['human', 'rgb_array'],
10 | 'video.frames_per_second' : 30
11 | }
12 |
13 | def __init__(self):
14 | self.max_speed=8
15 | self.max_torque=2.
16 | self.dt=.05
17 | self.viewer = None
18 |
19 | high = np.array([1., 1., self.max_speed])
20 | self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32)
21 | self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
22 |
23 | self.seed()
24 |
25 | def seed(self, seed=None):
26 | self.np_random, seed = seeding.np_random(seed)
27 | return [seed]
28 |
29 | def step(self,u):
30 | th, thdot = self.state # th := theta
31 |
32 | g = 10.
33 | m = 1.
34 | l = 1.
35 | dt = self.dt
36 |
37 | u = np.clip(u, -self.max_torque, self.max_torque)[0]
38 | self.last_u = u # for rendering
39 | costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2)
40 |
41 | newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt
42 | newth = th + newthdot*dt
43 | newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) #pylint: disable=E1111
44 |
45 | self.state = np.array([newth, newthdot])
46 | return self._get_obs(), -costs, False, {}
47 |
48 | def reset(self):
49 | high = np.array([np.pi, 1])
50 | self.state = self.np_random.uniform(low=-high, high=high)
51 | self.last_u = None
52 | return self._get_obs()
53 |
54 | def _get_obs(self):
55 | theta, thetadot = self.state
56 | return np.array([np.cos(theta), np.sin(theta), thetadot])
57 |
58 | def render(self, mode='human'):
59 |
60 | if self.viewer is None:
61 | from gym.envs.classic_control import rendering
62 | self.viewer = rendering.Viewer(500,500)
63 | self.viewer.set_bounds(-2.2,2.2,-2.2,2.2)
64 | rod = rendering.make_capsule(1, .2)
65 | rod.set_color(.8, .3, .3)
66 | self.pole_transform = rendering.Transform()
67 | rod.add_attr(self.pole_transform)
68 | self.viewer.add_geom(rod)
69 | axle = rendering.make_circle(.05)
70 | axle.set_color(0,0,0)
71 | self.viewer.add_geom(axle)
72 | fname = path.join(path.dirname(__file__), "assets/clockwise.png")
73 | self.img = rendering.Image(fname, 1., 1.)
74 | self.imgtrans = rendering.Transform()
75 | self.img.add_attr(self.imgtrans)
76 |
77 | self.viewer.add_onetime(self.img)
78 | self.pole_transform.set_rotation(self.state[0] + np.pi/2)
79 | if self.last_u:
80 | self.imgtrans.scale = (-self.last_u/2, np.abs(self.last_u)/2)
81 |
82 | return self.viewer.render(return_rgb_array = mode=='rgb_array')
83 |
84 | def close(self):
85 | if self.viewer:
86 | self.viewer.close()
87 | self.viewer = None
88 |
89 | def angle_normalize(x):
90 | return (((x+np.pi) % (2*np.pi)) - np.pi)
91 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/guessing_game.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym import spaces
5 | from gym.utils import seeding
6 |
7 |
8 | class GuessingGame(gym.Env):
9 | """Number guessing game
10 |
11 | The object of the game is to guess within 1% of the randomly chosen number
12 | within 200 time steps
13 |
14 | After each step the agent is provided with one of four possible observations
15 | which indicate where the guess is in relation to the randomly chosen number
16 |
17 | 0 - No guess yet submitted (only after reset)
18 | 1 - Guess is lower than the target
19 | 2 - Guess is equal to the target
20 | 3 - Guess is higher than the target
21 |
22 | The rewards are:
23 | 0 if the agent's guess is outside of 1% of the target
24 | 1 if the agent's guess is inside 1% of the target
25 |
26 | The episode terminates after the agent guesses within 1% of the target or
27 | 200 steps have been taken
28 |
29 | The agent will need to use a memory of previously submitted actions and observations
30 | in order to efficiently explore the available actions
31 |
32 | The purpose is to have agents optimise their exploration parameters (e.g. how far to
33 | explore from previous actions) based on previous experience. Because the goal changes
34 | each episode a state-value or action-value function isn't able to provide any additional
35 | benefit apart from being able to tell whether to increase or decrease the next guess.
36 |
37 | The perfect agent would likely learn the bounds of the action space (without referring
38 | to them explicitly) and then follow binary tree style exploration towards to goal number
39 | """
40 | def __init__(self):
41 | self.range = 1000 # Randomly selected number is within +/- this value
42 | self.bounds = 10000
43 |
44 | self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
45 | dtype=np.float32)
46 | self.observation_space = spaces.Discrete(4)
47 |
48 | self.number = 0
49 | self.guess_count = 0
50 | self.guess_max = 200
51 | self.observation = 0
52 |
53 | self.seed()
54 | self.reset()
55 |
56 | def seed(self, seed=None):
57 | self.np_random, seed = seeding.np_random(seed)
58 | return [seed]
59 |
60 | def step(self, action):
61 | assert self.action_space.contains(action)
62 |
63 | if action < self.number:
64 | self.observation = 1
65 |
66 | elif action == self.number:
67 | self.observation = 2
68 |
69 | elif action > self.number:
70 | self.observation = 3
71 |
72 | reward = 0
73 | done = False
74 |
75 | if (self.number - self.range * 0.01) < action < (self.number + self.range * 0.01):
76 | reward = 1
77 | done = True
78 |
79 | self.guess_count += 1
80 | if self.guess_count >= self.guess_max:
81 | done = True
82 |
83 | return self.observation, reward, done, {"number": self.number, "guesses": self.guess_count}
84 |
85 | def reset(self):
86 | self.number = self.np_random.uniform(-self.range, self.range)
87 | self.guess_count = 0
88 | self.observation = 0
89 | return self.observation
90 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_envs_semantics.py:
--------------------------------------------------------------------------------
1 | """
2 | Currently disabled since this was done in a very poor way
3 | Hashed str representation of objects
4 | """
5 |
6 |
7 | from __future__ import unicode_literals
8 | import json
9 | import hashlib
10 | import os
11 |
12 | import pytest
13 | from gym import spaces, logger
14 | from gym.envs.tests.spec_list import spec_list
15 |
16 | DATA_DIR = os.path.dirname(__file__)
17 | ROLLOUT_STEPS = 100
18 | episodes = ROLLOUT_STEPS
19 | steps = ROLLOUT_STEPS
20 |
21 | ROLLOUT_FILE = os.path.join(DATA_DIR, 'rollout.json')
22 |
23 | if not os.path.isfile(ROLLOUT_FILE):
24 | with open(ROLLOUT_FILE, "w") as outfile:
25 | json.dump({}, outfile, indent=2)
26 |
27 | def hash_object(unhashed):
28 | return hashlib.sha256(str(unhashed).encode('utf-16')).hexdigest() # This is really bad, str could be same while values change
29 |
30 | def generate_rollout_hash(spec):
31 | spaces.seed(0)
32 | env = spec.make()
33 | env.seed(0)
34 |
35 | observation_list = []
36 | action_list = []
37 | reward_list = []
38 | done_list = []
39 |
40 | total_steps = 0
41 | for episode in range(episodes):
42 | if total_steps >= ROLLOUT_STEPS: break
43 | observation = env.reset()
44 |
45 | for step in range(steps):
46 | action = env.action_space.sample()
47 | observation, reward, done, _ = env.step(action)
48 |
49 | action_list.append(action)
50 | observation_list.append(observation)
51 | reward_list.append(reward)
52 | done_list.append(done)
53 |
54 | total_steps += 1
55 | if total_steps >= ROLLOUT_STEPS: break
56 |
57 | if done: break
58 |
59 | observations_hash = hash_object(observation_list)
60 | actions_hash = hash_object(action_list)
61 | rewards_hash = hash_object(reward_list)
62 | dones_hash = hash_object(done_list)
63 |
64 | env.close()
65 | return observations_hash, actions_hash, rewards_hash, dones_hash
66 |
67 | @pytest.mark.parametrize("spec", spec_list)
68 | def test_env_semantics(spec):
69 | logger.warn("Skipping this test. Existing hashes were generated in a bad way")
70 | return
71 | with open(ROLLOUT_FILE) as data_file:
72 | rollout_dict = json.load(data_file)
73 |
74 | if spec.id not in rollout_dict:
75 | if not spec.nondeterministic:
76 | logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id))
77 | return
78 |
79 | logger.info("Testing rollout for {} environment...".format(spec.id))
80 |
81 | observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec)
82 |
83 | errors = []
84 | if rollout_dict[spec.id]['observations'] != observations_now:
85 | errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now))
86 | if rollout_dict[spec.id]['actions'] != actions_now:
87 | errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now))
88 | if rollout_dict[spec.id]['rewards'] != rewards_now:
89 | errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
90 | if rollout_dict[spec.id]['dones'] != dones_now:
91 | errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now))
92 | if len(errors):
93 | for error in errors:
94 | logger.warn(error)
95 | raise ValueError(errors)
96 |
--------------------------------------------------------------------------------
/gym/utils/seeding.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import numpy as np
3 | import os
4 | import random as _random
5 | from six import integer_types
6 | import struct
7 | import sys
8 |
9 | from gym import error
10 |
11 | def np_random(seed=None):
12 | if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed):
13 | raise error.Error('Seed must be a non-negative integer or omitted, not {}'.format(seed))
14 |
15 | seed = create_seed(seed)
16 |
17 | rng = np.random.RandomState()
18 | rng.seed(_int_list_from_bigint(hash_seed(seed)))
19 | return rng, seed
20 |
21 | def hash_seed(seed=None, max_bytes=8):
22 | """Any given evaluation is likely to have many PRNG's active at
23 | once. (Most commonly, because the environment is running in
24 | multiple processes.) There's literature indicating that having
25 | linear correlations between seeds of multiple PRNG's can correlate
26 | the outputs:
27 |
28 | http://blogs.unity3d.com/2015/01/07/a-primer-on-repeatable-random-numbers/
29 | http://stackoverflow.com/questions/1554958/how-different-do-random-seeds-need-to-be
30 | http://dl.acm.org/citation.cfm?id=1276928
31 |
32 | Thus, for sanity we hash the seeds before using them. (This scheme
33 | is likely not crypto-strength, but it should be good enough to get
34 | rid of simple correlations.)
35 |
36 | Args:
37 | seed (Optional[int]): None seeds from an operating system specific randomness source.
38 | max_bytes: Maximum number of bytes to use in the hashed seed.
39 | """
40 | if seed is None:
41 | seed = create_seed(max_bytes=max_bytes)
42 | hash = hashlib.sha512(str(seed).encode('utf8')).digest()
43 | return _bigint_from_bytes(hash[:max_bytes])
44 |
45 | def create_seed(a=None, max_bytes=8):
46 | """Create a strong random seed. Otherwise, Python 2 would seed using
47 | the system time, which might be non-robust especially in the
48 | presence of concurrency.
49 |
50 | Args:
51 | a (Optional[int, str]): None seeds from an operating system specific randomness source.
52 | max_bytes: Maximum number of bytes to use in the seed.
53 | """
54 | # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
55 | if a is None:
56 | a = _bigint_from_bytes(os.urandom(max_bytes))
57 | elif isinstance(a, str):
58 | a = a.encode('utf8')
59 | a += hashlib.sha512(a).digest()
60 | a = _bigint_from_bytes(a[:max_bytes])
61 | elif isinstance(a, integer_types):
62 | a = a % 2**(8 * max_bytes)
63 | else:
64 | raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a))
65 |
66 | return a
67 |
68 | # TODO: don't hardcode sizeof_int here
69 | def _bigint_from_bytes(bytes):
70 | sizeof_int = 4
71 | padding = sizeof_int - len(bytes) % sizeof_int
72 | bytes += b'\0' * padding
73 | int_count = int(len(bytes) / sizeof_int)
74 | unpacked = struct.unpack("{}I".format(int_count), bytes)
75 | accum = 0
76 | for i, val in enumerate(unpacked):
77 | accum += 2 ** (sizeof_int * 8 * i) * val
78 | return accum
79 |
80 | def _int_list_from_bigint(bigint):
81 | # Special case 0
82 | if bigint < 0:
83 | raise error.Error('Seed must be non-negative, not {}'.format(bigint))
84 | elif bigint == 0:
85 | return [0]
86 |
87 | ints = []
88 | while bigint > 0:
89 | bigint, mod = divmod(bigint, 2 ** 32)
90 | ints.append(mod)
91 | return ints
92 |
--------------------------------------------------------------------------------
/examples/agents/cem.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import gym
4 | from gym import wrappers, logger
5 | import numpy as np
6 | from six.moves import cPickle as pickle
7 | import json, sys, os
8 | from os import path
9 | from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
10 | import argparse
11 |
12 | def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
13 | """
14 | Generic implementation of the cross-entropy method for maximizing a black-box function
15 |
16 | f: a function mapping from vector -> scalar
17 | th_mean: initial mean over input distribution
18 | batch_size: number of samples of theta to evaluate per batch
19 | n_iter: number of batches
20 | elite_frac: each batch, select this fraction of the top-performing samples
21 | initial_std: initial standard deviation over parameter vectors
22 | """
23 | n_elite = int(np.round(batch_size*elite_frac))
24 | th_std = np.ones_like(th_mean) * initial_std
25 |
26 | for _ in range(n_iter):
27 | ths = np.array([th_mean + dth for dth in th_std[None,:]*np.random.randn(batch_size, th_mean.size)])
28 | ys = np.array([f(th) for th in ths])
29 | elite_inds = ys.argsort()[::-1][:n_elite]
30 | elite_ths = ths[elite_inds]
31 | th_mean = elite_ths.mean(axis=0)
32 | th_std = elite_ths.std(axis=0)
33 | yield {'ys' : ys, 'theta_mean' : th_mean, 'y_mean' : ys.mean()}
34 |
35 | def do_rollout(agent, env, num_steps, render=False):
36 | total_rew = 0
37 | ob = env.reset()
38 | for t in range(num_steps):
39 | a = agent.act(ob)
40 | (ob, reward, done, _info) = env.step(a)
41 | total_rew += reward
42 | if render and t%3==0: env.render()
43 | if done: break
44 | return total_rew, t+1
45 |
46 | if __name__ == '__main__':
47 | logger.set_level(logger.INFO)
48 |
49 | parser = argparse.ArgumentParser()
50 | parser.add_argument('--display', action='store_true')
51 | parser.add_argument('target', nargs="?", default="CartPole-v0")
52 | args = parser.parse_args()
53 |
54 | env = gym.make(args.target)
55 | env.seed(0)
56 | np.random.seed(0)
57 | params = dict(n_iter=10, batch_size=25, elite_frac = 0.2)
58 | num_steps = 200
59 |
60 | # You provide the directory to write to (can be an existing
61 | # directory, but can't contain previous monitor results. You can
62 | # also dump to a tempdir if you'd like: tempfile.mkdtemp().
63 | outdir = '/tmp/cem-agent-results'
64 | env = wrappers.Monitor(env, outdir, force=True)
65 |
66 | # Prepare snapshotting
67 | # ----------------------------------------
68 | def writefile(fname, s):
69 | with open(path.join(outdir, fname), 'w') as fh: fh.write(s)
70 | info = {}
71 | info['params'] = params
72 | info['argv'] = sys.argv
73 | info['env_id'] = env.spec.id
74 | # ------------------------------------------
75 |
76 | def noisy_evaluation(theta):
77 | agent = BinaryActionLinearPolicy(theta)
78 | rew, T = do_rollout(agent, env, num_steps)
79 | return rew
80 |
81 | # Train the agent, and snapshot each stage
82 | for (i, iterdata) in enumerate(
83 | cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
84 | print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']))
85 | agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
86 | if args.display: do_rollout(agent, env, 200, render=True)
87 | writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1)))
88 |
89 | # Write out the env at the end so we store the parameters of this
90 | # environment.
91 | writefile('info.json', json.dumps(info))
92 |
93 | env.close()
94 |
--------------------------------------------------------------------------------
/gym/error.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | class Error(Exception):
4 | pass
5 |
6 | # Local errors
7 |
8 | class Unregistered(Error):
9 | """Raised when the user requests an item from the registry that does
10 | not actually exist.
11 | """
12 | pass
13 |
14 | class UnregisteredEnv(Unregistered):
15 | """Raised when the user requests an env from the registry that does
16 | not actually exist.
17 | """
18 | pass
19 |
20 | class UnregisteredBenchmark(Unregistered):
21 | """Raised when the user requests an env from the registry that does
22 | not actually exist.
23 | """
24 | pass
25 |
26 | class DeprecatedEnv(Error):
27 | """Raised when the user requests an env from the registry with an
28 | older version number than the latest env with the same name.
29 | """
30 | pass
31 |
32 | class UnseedableEnv(Error):
33 | """Raised when the user tries to seed an env that does not support
34 | seeding.
35 | """
36 | pass
37 |
38 | class DependencyNotInstalled(Error):
39 | pass
40 |
41 | class UnsupportedMode(Exception):
42 | """Raised when the user requests a rendering mode not supported by the
43 | environment.
44 | """
45 | pass
46 |
47 | class ResetNeeded(Exception):
48 | """When the monitor is active, raised when the user tries to step an
49 | environment that's already done.
50 | """
51 | pass
52 |
53 | class ResetNotAllowed(Exception):
54 | """When the monitor is active, raised when the user tries to step an
55 | environment that's not yet done.
56 | """
57 | pass
58 |
59 | class InvalidAction(Exception):
60 | """Raised when the user performs an action not contained within the
61 | action space
62 | """
63 | pass
64 |
65 | # API errors
66 |
67 | class APIError(Error):
68 | def __init__(self, message=None, http_body=None, http_status=None,
69 | json_body=None, headers=None):
70 | super(APIError, self).__init__(message)
71 |
72 | if http_body and hasattr(http_body, 'decode'):
73 | try:
74 | http_body = http_body.decode('utf-8')
75 | except:
76 | http_body = ('')
78 |
79 | self._message = message
80 | self.http_body = http_body
81 | self.http_status = http_status
82 | self.json_body = json_body
83 | self.headers = headers or {}
84 | self.request_id = self.headers.get('request-id', None)
85 |
86 | def __unicode__(self):
87 | if self.request_id is not None:
88 | msg = self._message or ""
89 | return u"Request {0}: {1}".format(self.request_id, msg)
90 | else:
91 | return self._message
92 |
93 | def __str__(self):
94 | try: # Python 2
95 | return unicode(self).encode('utf-8')
96 | except NameError: # Python 3
97 | return self.__unicode__()
98 |
99 |
100 | class APIConnectionError(APIError):
101 | pass
102 |
103 |
104 | class InvalidRequestError(APIError):
105 |
106 | def __init__(self, message, param, http_body=None,
107 | http_status=None, json_body=None, headers=None):
108 | super(InvalidRequestError, self).__init__(
109 | message, http_body, http_status, json_body,
110 | headers)
111 | self.param = param
112 |
113 |
114 | class AuthenticationError(APIError):
115 | pass
116 |
117 | class RateLimitError(APIError):
118 | pass
119 |
120 | # Video errors
121 |
122 | class VideoRecorderError(Error):
123 | pass
124 |
125 | class InvalidFrame(Error):
126 | pass
127 |
128 | # Wrapper errors
129 |
130 | class DoubleWrapperError(Error):
131 | pass
132 |
133 |
134 | class WrapAfterConfigureError(Error):
135 | pass
136 |
137 |
138 | class RetriesExceededError(Error):
139 | pass
140 |
--------------------------------------------------------------------------------
/gym/envs/robotics/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym import error
4 | try:
5 | import mujoco_py
6 | except ImportError as e:
7 | raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
8 |
9 |
10 | def robot_get_obs(sim):
11 | """Returns all joint positions and velocities associated with
12 | a robot.
13 | """
14 | if sim.data.qpos is not None and sim.model.joint_names:
15 | names = [n for n in sim.model.joint_names if n.startswith('robot')]
16 | return (
17 | np.array([sim.data.get_joint_qpos(name) for name in names]),
18 | np.array([sim.data.get_joint_qvel(name) for name in names]),
19 | )
20 | return np.zeros(0), np.zeros(0)
21 |
22 |
23 | def ctrl_set_action(sim, action):
24 | """For torque actuators it copies the action into mujoco ctrl field.
25 | For position actuators it sets the target relative to the current qpos.
26 | """
27 | if sim.model.nmocap > 0:
28 | _, action = np.split(action, (sim.model.nmocap * 7, ))
29 | if sim.data.ctrl is not None:
30 | for i in range(action.shape[0]):
31 | if sim.model.actuator_biastype[i] == 0:
32 | sim.data.ctrl[i] = action[i]
33 | else:
34 | idx = sim.model.jnt_qposadr[sim.model.actuator_trnid[i, 0]]
35 | sim.data.ctrl[i] = sim.data.qpos[idx] + action[i]
36 |
37 |
38 | def mocap_set_action(sim, action):
39 | """The action controls the robot using mocaps. Specifically, bodies
40 | on the robot (for example the gripper wrist) is controlled with
41 | mocap bodies. In this case the action is the desired difference
42 | in position and orientation (quaternion), in world coordinates,
43 | of the of the target body. The mocap is positioned relative to
44 | the target body according to the delta, and the MuJoCo equality
45 | constraint optimizer tries to center the welded body on the mocap.
46 | """
47 | if sim.model.nmocap > 0:
48 | action, _ = np.split(action, (sim.model.nmocap * 7, ))
49 | action = action.reshape(sim.model.nmocap, 7)
50 |
51 | pos_delta = action[:, :3]
52 | quat_delta = action[:, 3:]
53 |
54 | reset_mocap2body_xpos(sim)
55 | sim.data.mocap_pos[:] = sim.data.mocap_pos + pos_delta
56 | sim.data.mocap_quat[:] = sim.data.mocap_quat + quat_delta
57 |
58 |
59 | def reset_mocap_welds(sim):
60 | """Resets the mocap welds that we use for actuation.
61 | """
62 | if sim.model.nmocap > 0 and sim.model.eq_data is not None:
63 | for i in range(sim.model.eq_data.shape[0]):
64 | if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD:
65 | sim.model.eq_data[i, :] = np.array(
66 | [0., 0., 0., 1., 0., 0., 0.])
67 | sim.forward()
68 |
69 |
70 | def reset_mocap2body_xpos(sim):
71 | """Resets the position and orientation of the mocap bodies to the same
72 | values as the bodies they're welded to.
73 | """
74 |
75 | if (sim.model.eq_type is None or
76 | sim.model.eq_obj1id is None or
77 | sim.model.eq_obj2id is None):
78 | return
79 | for eq_type, obj1_id, obj2_id in zip(sim.model.eq_type,
80 | sim.model.eq_obj1id,
81 | sim.model.eq_obj2id):
82 | if eq_type != mujoco_py.const.EQ_WELD:
83 | continue
84 |
85 | mocap_id = sim.model.body_mocapid[obj1_id]
86 | if mocap_id != -1:
87 | # obj1 is the mocap, obj2 is the welded body
88 | body_idx = obj2_id
89 | else:
90 | # obj2 is the mocap, obj1 is the welded body
91 | mocap_id = sim.model.body_mocapid[obj2_id]
92 | body_idx = obj1_id
93 |
94 | assert (mocap_id != -1)
95 | sim.data.mocap_pos[mocap_id][:] = sim.data.body_xpos[body_idx]
96 | sim.data.mocap_quat[mocap_id][:] = sim.data.body_xquat[body_idx]
97 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/stats_recorder.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import time
4 |
5 | from gym import error
6 | from gym.utils import atomic_write
7 | from gym.utils.json_utils import json_encode_np
8 |
9 | class StatsRecorder(object):
10 | def __init__(self, directory, file_prefix, autoreset=False, env_id=None):
11 | self.autoreset = autoreset
12 | self.env_id = env_id
13 |
14 | self.initial_reset_timestamp = None
15 | self.directory = directory
16 | self.file_prefix = file_prefix
17 | self.episode_lengths = []
18 | self.episode_rewards = []
19 | self.episode_types = [] # experimental addition
20 | self._type = 't'
21 | self.timestamps = []
22 | self.steps = None
23 | self.total_steps = 0
24 | self.rewards = None
25 |
26 | self.done = None
27 | self.closed = False
28 |
29 | filename = '{}.stats.json'.format(self.file_prefix)
30 | self.path = os.path.join(self.directory, filename)
31 |
32 | @property
33 | def type(self):
34 | return self._type
35 |
36 | @type.setter
37 | def type(self, type):
38 | if type not in ['t', 'e']:
39 | raise error.Error('Invalid episode type {}: must be t for training or e for evaluation', type)
40 | self._type = type
41 |
42 | def before_step(self, action):
43 | assert not self.closed
44 |
45 | if self.done:
46 | raise error.ResetNeeded("Trying to step environment which is currently done. While the monitor is active for {}, you cannot step beyond the end of an episode. Call 'env.reset()' to start the next episode.".format(self.env_id))
47 | elif self.steps is None:
48 | raise error.ResetNeeded("Trying to step an environment before reset. While the monitor is active for {}, you must call 'env.reset()' before taking an initial step.".format(self.env_id))
49 |
50 | def after_step(self, observation, reward, done, info):
51 | self.steps += 1
52 | self.total_steps += 1
53 | self.rewards += reward
54 | self.done = done
55 |
56 | if done:
57 | self.save_complete()
58 |
59 | if done:
60 | if self.autoreset:
61 | self.before_reset()
62 | self.after_reset(observation)
63 |
64 | def before_reset(self):
65 | assert not self.closed
66 |
67 | if self.done is not None and not self.done and self.steps > 0:
68 | raise error.Error("Tried to reset environment which is not done. While the monitor is active for {}, you cannot call reset() unless the episode is over.".format(self.env_id))
69 |
70 | self.done = False
71 | if self.initial_reset_timestamp is None:
72 | self.initial_reset_timestamp = time.time()
73 |
74 | def after_reset(self, observation):
75 | self.steps = 0
76 | self.rewards = 0
77 | # We write the type at the beginning of the episode. If a user
78 | # changes the type, it's more natural for it to apply next
79 | # time the user calls reset().
80 | self.episode_types.append(self._type)
81 |
82 | def save_complete(self):
83 | if self.steps is not None:
84 | self.episode_lengths.append(self.steps)
85 | self.episode_rewards.append(float(self.rewards))
86 | self.timestamps.append(time.time())
87 |
88 | def close(self):
89 | self.flush()
90 | self.closed = True
91 |
92 | def flush(self):
93 | if self.closed:
94 | return
95 |
96 | with atomic_write.atomic_write(self.path) as f:
97 | json.dump({
98 | 'initial_reset_timestamp': self.initial_reset_timestamp,
99 | 'timestamps': self.timestamps,
100 | 'episode_lengths': self.episode_lengths,
101 | 'episode_rewards': self.episode_rewards,
102 | 'episode_types': self.episode_types,
103 | }, f, default=json_encode_np)
104 |
--------------------------------------------------------------------------------
/scripts/generate_json.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 | from gym import envs, spaces, logger
3 | import json
4 | import os
5 | import sys
6 | import argparse
7 |
8 | from gym.envs.tests.spec_list import should_skip_env_spec_for_tests
9 | from gym.envs.tests.test_envs_semantics import generate_rollout_hash, hash_object
10 |
11 | DATA_DIR = os.path.join(os.path.dirname(__file__), os.pardir, 'gym', 'envs', 'tests')
12 | ROLLOUT_STEPS = 100
13 | episodes = ROLLOUT_STEPS
14 | steps = ROLLOUT_STEPS
15 |
16 | ROLLOUT_FILE = os.path.join(DATA_DIR, 'rollout.json')
17 |
18 | if not os.path.isfile(ROLLOUT_FILE):
19 | logger.info("No rollout file found. Writing empty json file to {}".format(ROLLOUT_FILE))
20 | with open(ROLLOUT_FILE, "w") as outfile:
21 | json.dump({}, outfile, indent=2)
22 |
23 | def update_rollout_dict(spec, rollout_dict):
24 | """
25 | Takes as input the environment spec for which the rollout is to be generated,
26 | and the existing dictionary of rollouts. Returns True iff the dictionary was
27 | modified.
28 | """
29 | # Skip platform-dependent
30 | if should_skip_env_spec_for_tests(spec):
31 | logger.info("Skipping tests for {}".format(spec.id))
32 | return False
33 |
34 | # Skip environments that are nondeterministic
35 | if spec.nondeterministic:
36 | logger.info("Skipping tests for nondeterministic env {}".format(spec.id))
37 | return False
38 |
39 | logger.info("Generating rollout for {}".format(spec.id))
40 |
41 | try:
42 | observations_hash, actions_hash, rewards_hash, dones_hash = generate_rollout_hash(spec)
43 | except:
44 | # If running the env generates an exception, don't write to the rollout file
45 | logger.warn("Exception {} thrown while generating rollout for {}. Rollout not added.".format(sys.exc_info()[0], spec.id))
46 | return False
47 |
48 | rollout = {}
49 | rollout['observations'] = observations_hash
50 | rollout['actions'] = actions_hash
51 | rollout['rewards'] = rewards_hash
52 | rollout['dones'] = dones_hash
53 |
54 | existing = rollout_dict.get(spec.id)
55 | if existing:
56 | differs = False
57 | for key, new_hash in rollout.items():
58 | differs = differs or existing[key] != new_hash
59 | if not differs:
60 | logger.debug("Hashes match with existing for {}".format(spec.id))
61 | return False
62 | else:
63 | logger.warn("Got new hash for {}. Overwriting.".format(spec.id))
64 |
65 | rollout_dict[spec.id] = rollout
66 | return True
67 |
68 | def add_new_rollouts(spec_ids, overwrite):
69 | environments = [spec for spec in envs.registry.all() if spec._entry_point is not None]
70 | if spec_ids:
71 | environments = [spec for spec in environments if spec.id in spec_ids]
72 | assert len(environments) == len(spec_ids), "Some specs not found"
73 | with open(ROLLOUT_FILE) as data_file:
74 | rollout_dict = json.load(data_file)
75 | modified = False
76 | for spec in environments:
77 | if not overwrite and spec.id in rollout_dict:
78 | logger.debug("Rollout already exists for {}. Skipping.".format(spec.id))
79 | else:
80 | modified = update_rollout_dict(spec, rollout_dict) or modified
81 |
82 | if modified:
83 | logger.info("Writing new rollout file to {}".format(ROLLOUT_FILE))
84 | with open(ROLLOUT_FILE, "w") as outfile:
85 | json.dump(rollout_dict, outfile, indent=2, sort_keys=True)
86 | else:
87 | logger.info("No modifications needed.")
88 |
89 | if __name__ == '__main__':
90 | parser = argparse.ArgumentParser()
91 | parser.add_argument('-f', '--force', action='store_true', help='Overwrite '+
92 | 'existing rollouts if hashes differ.')
93 | parser.add_argument('-v', '--verbose', action='store_true')
94 | parser.add_argument('specs', nargs='*', help='ids of env specs to check (default: all)')
95 | args = parser.parse_args()
96 | if args.verbose:
97 | logger.set_level(logger.INFO)
98 | add_new_rollouts(args.specs, args.force)
99 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/cliffwalking.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys
3 | from gym.envs.toy_text import discrete
4 |
5 | UP = 0
6 | RIGHT = 1
7 | DOWN = 2
8 | LEFT = 3
9 |
10 |
11 | class CliffWalkingEnv(discrete.DiscreteEnv):
12 | """
13 | This is a simple implementation of the Gridworld Cliff
14 | reinforcement learning task.
15 |
16 | Adapted from Example 6.6 (page 132) from Reinforcement Learning: An Introduction
17 | by Sutton and Barto:
18 | http://incompleteideas.net/book/the-book-2nd.html
19 |
20 | With inspiration from:
21 | https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py
22 |
23 | The board is a 4x12 matrix, with (using Numpy matrix indexing):
24 | [3, 0] as the start at bottom-left
25 | [3, 11] as the goal at bottom-right
26 | [3, 1..10] as the cliff at bottom-center
27 |
28 | Each time step incurs -1 reward, and stepping into the cliff incurs -100 reward
29 | and a reset to the start. An episode terminates when the agent reaches the goal.
30 | """
31 | metadata = {'render.modes': ['human', 'ansi']}
32 |
33 | def __init__(self):
34 | self.shape = (4, 12)
35 | self.start_state_index = np.ravel_multi_index((3, 0), self.shape)
36 |
37 | nS = np.prod(self.shape)
38 | nA = 4
39 |
40 | # Cliff Location
41 | self._cliff = np.zeros(self.shape, dtype=np.bool)
42 | self._cliff[3, 1:-1] = True
43 |
44 | # Calculate transition probabilities and rewards
45 | P = {}
46 | for s in range(nS):
47 | position = np.unravel_index(s, self.shape)
48 | P[s] = {a: [] for a in range(nA)}
49 | P[s][UP] = self._calculate_transition_prob(position, [-1, 0])
50 | P[s][RIGHT] = self._calculate_transition_prob(position, [0, 1])
51 | P[s][DOWN] = self._calculate_transition_prob(position, [1, 0])
52 | P[s][LEFT] = self._calculate_transition_prob(position, [0, -1])
53 |
54 | # Calculate initial state distribution
55 | # We always start in state (3, 0)
56 | isd = np.zeros(nS)
57 | isd[self.start_state_index] = 1.0
58 |
59 | super(CliffWalkingEnv, self).__init__(nS, nA, P, isd)
60 |
61 | def _limit_coordinates(self, coord):
62 | """
63 | Prevent the agent from falling out of the grid world
64 | :param coord:
65 | :return:
66 | """
67 | coord[0] = min(coord[0], self.shape[0] - 1)
68 | coord[0] = max(coord[0], 0)
69 | coord[1] = min(coord[1], self.shape[1] - 1)
70 | coord[1] = max(coord[1], 0)
71 | return coord
72 |
73 | def _calculate_transition_prob(self, current, delta):
74 | """
75 | Determine the outcome for an action. Transition Prob is always 1.0.
76 | :param current: Current position on the grid as (row, col)
77 | :param delta: Change in position for transition
78 | :return: (1.0, new_state, reward, done)
79 | """
80 | new_position = np.array(current) + np.array(delta)
81 | new_position = self._limit_coordinates(new_position).astype(int)
82 | new_state = np.ravel_multi_index(tuple(new_position), self.shape)
83 | if self._cliff[tuple(new_position)]:
84 | return [(1.0, self.start_state_index, -100, False)]
85 |
86 | terminal_state = (self.shape[0] - 1, self.shape[1] - 1)
87 | is_done = tuple(new_position) == terminal_state
88 | return [(1.0, new_state, -1, is_done)]
89 |
90 | def render(self, mode='human'):
91 | outfile = sys.stdout
92 |
93 | for s in range(self.nS):
94 | position = np.unravel_index(s, self.shape)
95 | if self.s == s:
96 | output = " x "
97 | # Print terminal state
98 | elif position == (3, 11):
99 | output = " T "
100 | elif self._cliff[position]:
101 | output = " C "
102 | else:
103 | output = " o "
104 |
105 | if position[1] == 0:
106 | output = output.lstrip()
107 | if position[1] == self.shape[1] - 1:
108 | output = output.rstrip()
109 | output += '\n'
110 |
111 | outfile.write(output)
112 | outfile.write('\n')
113 |
114 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/walker2d.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/blackjack.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.utils import seeding
4 |
5 | def cmp(a, b):
6 | return float(a > b) - float(a < b)
7 |
8 | # 1 = Ace, 2-10 = Number cards, Jack/Queen/King = 10
9 | deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]
10 |
11 |
12 | def draw_card(np_random):
13 | return int(np_random.choice(deck))
14 |
15 |
16 | def draw_hand(np_random):
17 | return [draw_card(np_random), draw_card(np_random)]
18 |
19 |
20 | def usable_ace(hand): # Does this hand have a usable ace?
21 | return 1 in hand and sum(hand) + 10 <= 21
22 |
23 |
24 | def sum_hand(hand): # Return current hand total
25 | if usable_ace(hand):
26 | return sum(hand) + 10
27 | return sum(hand)
28 |
29 |
30 | def is_bust(hand): # Is this hand a bust?
31 | return sum_hand(hand) > 21
32 |
33 |
34 | def score(hand): # What is the score of this hand (0 if bust)
35 | return 0 if is_bust(hand) else sum_hand(hand)
36 |
37 |
38 | def is_natural(hand): # Is this hand a natural blackjack?
39 | return sorted(hand) == [1, 10]
40 |
41 |
42 | class BlackjackEnv(gym.Env):
43 | """Simple blackjack environment
44 |
45 | Blackjack is a card game where the goal is to obtain cards that sum to as
46 | near as possible to 21 without going over. They're playing against a fixed
47 | dealer.
48 | Face cards (Jack, Queen, King) have point value 10.
49 | Aces can either count as 11 or 1, and it's called 'usable' at 11.
50 | This game is placed with an infinite deck (or with replacement).
51 | The game starts with each (player and dealer) having one face up and one
52 | face down card.
53 |
54 | The player can request additional cards (hit=1) until they decide to stop
55 | (stick=0) or exceed 21 (bust).
56 |
57 | After the player sticks, the dealer reveals their facedown card, and draws
58 | until their sum is 17 or greater. If the dealer goes bust the player wins.
59 |
60 | If neither player nor dealer busts, the outcome (win, lose, draw) is
61 | decided by whose sum is closer to 21. The reward for winning is +1,
62 | drawing is 0, and losing is -1.
63 |
64 | The observation of a 3-tuple of: the players current sum,
65 | the dealer's one showing card (1-10 where 1 is ace),
66 | and whether or not the player holds a usable ace (0 or 1).
67 |
68 | This environment corresponds to the version of the blackjack problem
69 | described in Example 5.1 in Reinforcement Learning: An Introduction
70 | by Sutton and Barto.
71 | http://incompleteideas.net/book/the-book-2nd.html
72 | """
73 | def __init__(self, natural=False):
74 | self.action_space = spaces.Discrete(2)
75 | self.observation_space = spaces.Tuple((
76 | spaces.Discrete(32),
77 | spaces.Discrete(11),
78 | spaces.Discrete(2)))
79 | self.seed()
80 |
81 | # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
82 | # Ref: http://www.bicyclecards.com/how-to-play/blackjack/
83 | self.natural = natural
84 | # Start the first game
85 | self.reset()
86 |
87 | def seed(self, seed=None):
88 | self.np_random, seed = seeding.np_random(seed)
89 | return [seed]
90 |
91 | def step(self, action):
92 | assert self.action_space.contains(action)
93 | if action: # hit: add a card to players hand and return
94 | self.player.append(draw_card(self.np_random))
95 | if is_bust(self.player):
96 | done = True
97 | reward = -1
98 | else:
99 | done = False
100 | reward = 0
101 | else: # stick: play out the dealers hand, and score
102 | done = True
103 | while sum_hand(self.dealer) < 17:
104 | self.dealer.append(draw_card(self.np_random))
105 | reward = cmp(score(self.player), score(self.dealer))
106 | if self.natural and is_natural(self.player) and reward == 1:
107 | reward = 1.5
108 | return self._get_obs(), reward, done, {}
109 |
110 | def _get_obs(self):
111 | return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))
112 |
113 | def reset(self):
114 | self.dealer = draw_hand(self.np_random)
115 | self.player = draw_hand(self.np_random)
116 | return self._get_obs()
117 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/shared.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------