├── .gitignore ├── LICENSE ├── README.md ├── envs ├── gym │ ├── .github │ │ ├── ISSUE_TEMPLATE │ │ │ ├── bug.md │ │ │ ├── proposal.md │ │ │ └── question.md │ │ ├── stale.yml │ │ └── workflows │ │ │ ├── build.yml │ │ │ ├── lint_python.yml │ │ │ └── pre-commit.yml │ ├── .gitignore │ ├── .pre-commit-config.yaml │ ├── CODE_OF_CONDUCT.rst │ ├── CONTRIBUTING.md │ ├── LICENSE.md │ ├── README.md │ ├── bin │ │ └── docker_entrypoint │ ├── docs │ │ ├── api.md │ │ ├── creating_environments.md │ │ ├── third_party_environments.md │ │ ├── toy_text │ │ │ ├── blackjack.md │ │ │ ├── frozen_lake.md │ │ │ └── taxi.md │ │ ├── tutorials.md │ │ └── wrappers.md │ ├── gym │ │ ├── __init__.py │ │ ├── core.py │ │ ├── envs │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── box2d │ │ │ │ ├── __init__.py │ │ │ │ ├── bipedal_walker.py │ │ │ │ ├── car_dynamics.py │ │ │ │ ├── car_racing.py │ │ │ │ ├── lunar_lander.py │ │ │ │ └── parking_garage │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bus.py │ │ │ │ │ ├── race_car.py │ │ │ │ │ ├── street_car.py │ │ │ │ │ └── trike.py │ │ │ ├── classic_control │ │ │ │ ├── __init__.py │ │ │ │ ├── acrobot.py │ │ │ │ ├── assets │ │ │ │ │ └── clockwise.png │ │ │ │ ├── cartpole.py │ │ │ │ ├── continuous_mountain_car.py │ │ │ │ ├── mountain_car.py │ │ │ │ ├── pendulum.py │ │ │ │ └── rendering.py │ │ │ ├── mujoco │ │ │ │ ├── __init__.py │ │ │ │ ├── ant.py │ │ │ │ ├── ant_v3.py │ │ │ │ ├── assets │ │ │ │ │ ├── ant.xml │ │ │ │ │ ├── half_cheetah.xml │ │ │ │ │ ├── hopper.xml │ │ │ │ │ ├── humanoid.xml │ │ │ │ │ ├── humanoidstandup.xml │ │ │ │ │ ├── inverted_double_pendulum.xml │ │ │ │ │ ├── inverted_pendulum.xml │ │ │ │ │ ├── point.xml │ │ │ │ │ ├── pusher.xml │ │ │ │ │ ├── reacher.xml │ │ │ │ │ ├── striker.xml │ │ │ │ │ ├── swimmer.xml │ │ │ │ │ ├── thrower.xml │ │ │ │ │ └── walker2d.xml │ │ │ │ ├── half_cheetah.py │ │ │ │ ├── half_cheetah_v3.py │ │ │ │ ├── hopper.py │ │ │ │ ├── hopper_v3.py │ │ │ │ ├── humanoid.py │ │ │ │ ├── humanoid_v3.py │ │ │ │ ├── humanoidstandup.py │ │ │ │ ├── inverted_double_pendulum.py │ │ │ │ ├── inverted_pendulum.py │ │ │ │ ├── mujoco_env.py │ │ │ │ ├── pusher.py │ │ │ │ ├── reacher.py │ │ │ │ ├── striker.py │ │ │ │ ├── swimmer.py │ │ │ │ ├── swimmer_v3.py │ │ │ │ ├── thrower.py │ │ │ │ ├── walker2d.py │ │ │ │ └── walker2d_v3.py │ │ │ ├── registration.py │ │ │ ├── robotics │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── assets │ │ │ │ │ ├── LICENSE.md │ │ │ │ │ ├── fetch │ │ │ │ │ │ ├── pick_and_place.xml │ │ │ │ │ │ ├── push.xml │ │ │ │ │ │ ├── reach.xml │ │ │ │ │ │ ├── robot.xml │ │ │ │ │ │ ├── shared.xml │ │ │ │ │ │ └── slide.xml │ │ │ │ │ ├── hand │ │ │ │ │ │ ├── manipulate_block.xml │ │ │ │ │ │ ├── manipulate_block_touch_sensors.xml │ │ │ │ │ │ ├── manipulate_egg.xml │ │ │ │ │ │ ├── manipulate_egg_touch_sensors.xml │ │ │ │ │ │ ├── manipulate_pen.xml │ │ │ │ │ │ ├── manipulate_pen_touch_sensors.xml │ │ │ │ │ │ ├── reach.xml │ │ │ │ │ │ ├── robot.xml │ │ │ │ │ │ ├── robot_touch_sensors_92.xml │ │ │ │ │ │ ├── shared.xml │ │ │ │ │ │ ├── shared_asset.xml │ │ │ │ │ │ └── shared_touch_sensors_92.xml │ │ │ │ │ ├── stls │ │ │ │ │ │ ├── .get │ │ │ │ │ │ ├── fetch │ │ │ │ │ │ │ ├── base_link_collision.stl │ │ │ │ │ │ │ ├── bellows_link_collision.stl │ │ │ │ │ │ │ ├── elbow_flex_link_collision.stl │ │ │ │ │ │ │ ├── estop_link.stl │ │ │ │ │ │ │ ├── forearm_roll_link_collision.stl │ │ │ │ │ │ │ ├── gripper_link.stl │ │ │ │ │ │ │ ├── head_pan_link_collision.stl │ │ │ │ │ │ │ ├── head_tilt_link_collision.stl │ │ │ │ │ │ │ ├── l_wheel_link_collision.stl │ │ │ │ │ │ │ ├── laser_link.stl │ │ │ │ │ │ │ ├── r_wheel_link_collision.stl │ │ │ │ │ │ │ ├── shoulder_lift_link_collision.stl │ │ │ │ │ │ │ ├── shoulder_pan_link_collision.stl │ │ │ │ │ │ │ ├── torso_fixed_link.stl │ │ │ │ │ │ │ ├── torso_lift_link_collision.stl │ │ │ │ │ │ │ ├── upperarm_roll_link_collision.stl │ │ │ │ │ │ │ ├── wrist_flex_link_collision.stl │ │ │ │ │ │ │ └── wrist_roll_link_collision.stl │ │ │ │ │ │ └── hand │ │ │ │ │ │ │ ├── F1.stl │ │ │ │ │ │ │ ├── F2.stl │ │ │ │ │ │ │ ├── F3.stl │ │ │ │ │ │ │ ├── TH1_z.stl │ │ │ │ │ │ │ ├── TH2_z.stl │ │ │ │ │ │ │ ├── TH3_z.stl │ │ │ │ │ │ │ ├── forearm_electric.stl │ │ │ │ │ │ │ ├── forearm_electric_cvx.stl │ │ │ │ │ │ │ ├── knuckle.stl │ │ │ │ │ │ │ ├── lfmetacarpal.stl │ │ │ │ │ │ │ ├── palm.stl │ │ │ │ │ │ │ └── wrist.stl │ │ │ │ │ └── textures │ │ │ │ │ │ ├── block.png │ │ │ │ │ │ └── block_hidden.png │ │ │ │ ├── fetch │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── pick_and_place.py │ │ │ │ │ ├── push.py │ │ │ │ │ ├── reach.py │ │ │ │ │ └── slide.py │ │ │ │ ├── fetch_env.py │ │ │ │ ├── hand │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── manipulate.py │ │ │ │ │ ├── manipulate_touch_sensors.py │ │ │ │ │ └── reach.py │ │ │ │ ├── hand_env.py │ │ │ │ ├── robot_env.py │ │ │ │ ├── rotations.py │ │ │ │ └── utils.py │ │ │ ├── toy_text │ │ │ │ ├── __init__.py │ │ │ │ ├── blackjack.py │ │ │ │ ├── cliffwalking.py │ │ │ │ ├── discrete.py │ │ │ │ ├── frozen_lake.py │ │ │ │ └── taxi.py │ │ │ └── unittest │ │ │ │ ├── __init__.py │ │ │ │ ├── cube_crash.py │ │ │ │ └── memorize_digits.py │ │ ├── error.py │ │ ├── logger.py │ │ ├── spaces │ │ │ ├── __init__.py │ │ │ ├── box.py │ │ │ ├── dict.py │ │ │ ├── discrete.py │ │ │ ├── multi_binary.py │ │ │ ├── multi_discrete.py │ │ │ ├── space.py │ │ │ ├── tuple.py │ │ │ └── utils.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── atomic_write.py │ │ │ ├── closer.py │ │ │ ├── colorize.py │ │ │ ├── env_checker.py │ │ │ ├── ezpickle.py │ │ │ ├── json_utils.py │ │ │ ├── play.py │ │ │ └── seeding.py │ │ ├── vector │ │ │ ├── __init__.py │ │ │ ├── async_vector_env.py │ │ │ ├── sync_vector_env.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── misc.py │ │ │ │ ├── numpy_utils.py │ │ │ │ ├── shared_memory.py │ │ │ │ └── spaces.py │ │ │ └── vector_env.py │ │ ├── version.py │ │ └── wrappers │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── atari_preprocessing.py │ │ │ ├── clip_action.py │ │ │ ├── filter_observation.py │ │ │ ├── flatten_observation.py │ │ │ ├── frame_stack.py │ │ │ ├── gray_scale_observation.py │ │ │ ├── monitor.py │ │ │ ├── monitoring │ │ │ ├── __init__.py │ │ │ ├── stats_recorder.py │ │ │ └── video_recorder.py │ │ │ ├── normalize.py │ │ │ ├── order_enforcing.py │ │ │ ├── pixel_observation.py │ │ │ ├── record_episode_statistics.py │ │ │ ├── record_video.py │ │ │ ├── rescale_action.py │ │ │ ├── resize_observation.py │ │ │ ├── time_aware_observation.py │ │ │ ├── time_limit.py │ │ │ ├── transform_observation.py │ │ │ └── transform_reward.py │ ├── py.Dockerfile │ ├── requirements.txt │ ├── scripts │ │ └── generate_json.py │ ├── setup.py │ ├── test_requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── envs │ │ ├── __init__.py │ │ ├── robotics │ │ │ ├── __init__.py │ │ │ └── hand │ │ │ │ ├── __init__.py │ │ │ │ ├── test_manipulate.py │ │ │ │ ├── test_manipulate_touch_sensors.py │ │ │ │ └── test_reach.py │ │ ├── rollout.json │ │ ├── spec_list.py │ │ ├── test_atari_legacy_env_specs.py │ │ ├── test_determinism.py │ │ ├── test_envs.py │ │ ├── test_envs_semantics.py │ │ ├── test_frozenlake_dfs.py │ │ ├── test_lunar_lander.py │ │ ├── test_mujoco_v2_to_v3_conversion.py │ │ └── test_registration.py │ │ ├── spaces │ │ ├── __init__.py │ │ ├── test_spaces.py │ │ └── test_utils.py │ │ ├── test_core.py │ │ ├── utils │ │ ├── __init__.py │ │ ├── test_atexit.py │ │ ├── test_env_checker.py │ │ └── test_seeding.py │ │ ├── vector │ │ ├── __init__.py │ │ ├── test_async_vector_env.py │ │ ├── test_numpy_utils.py │ │ ├── test_shared_memory.py │ │ ├── test_spaces.py │ │ ├── test_sync_vector_env.py │ │ ├── test_vector_env.py │ │ ├── test_vector_env_wrapper.py │ │ └── utils.py │ │ └── wrappers │ │ ├── __init__.py │ │ ├── flatten_test.py │ │ ├── monitoring │ │ ├── __init__.py │ │ ├── helpers.py │ │ └── test_video_recorder.py │ │ ├── nested_dict_test.py │ │ ├── test_atari_preprocessing.py │ │ ├── test_clip_action.py │ │ ├── test_filter_observation.py │ │ ├── test_flatten_observation.py │ │ ├── test_frame_stack.py │ │ ├── test_gray_scale_observation.py │ │ ├── test_normalize.py │ │ ├── test_pixel_observation.py │ │ ├── test_record_episode_statistics.py │ │ ├── test_record_video.py │ │ ├── test_rescale_action.py │ │ ├── test_resize_observation.py │ │ ├── test_time_aware_observation.py │ │ ├── test_transform_observation.py │ │ └── test_transform_reward.py └── mujoco-maze │ ├── .github │ └── workflows │ │ └── main.yml │ ├── .gitignore │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.md │ ├── mujoco_maze │ ├── __init__.py │ ├── agent_model.py │ ├── ant.py │ ├── assets │ │ ├── ant.xml │ │ ├── point.xml │ │ ├── reacher.xml │ │ └── swimmer.xml │ ├── maze_env.py │ ├── maze_env_utils.py │ ├── maze_task.py │ ├── point.py │ ├── reacher.py │ ├── swimmer.py │ └── websock_viewer.py │ ├── poetry.lock │ ├── pyproject.toml │ ├── screenshots │ ├── AntFall.png │ ├── AntPush.png │ ├── Point4Rooms.png │ ├── PointBilliard.png │ ├── PointCorridor.png │ ├── PointUMaze.png │ ├── SwimmerSquareRoom.png │ └── WebVis.png │ ├── setup.cfg │ ├── setup.py │ └── tests │ ├── test_envs.py │ └── test_intersect.py ├── helpers ├── __init__.py ├── bary_utils.py ├── custom_callback.py ├── monitor.py ├── utils.py └── w_encode.py ├── requirements.txt └── run_maze_continuous.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Peide Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GRADIENT: Curriculum Reinforcement Learning using Optimal Transport via Gradual Domain Adaptation 2 | 3 | ## Paper 4 | Huang P, Xu M, Zhu J, Shi L, Fang F, Zhao D. Curriculum reinforcement learning using optimal transport via gradual domain adaptation. Advances in Neural Information Processing Systems. 2022 Dec 6;35:10656-70. https://arxiv.org/abs/2210.10195 5 | 6 | ## Install dependencies 7 | ``` 8 | conda create --name gradient python=3.8.12 9 | pip install -r requirements.txt 10 | ``` 11 | ``` 12 | cd envs/gym && pip install -e . 13 | ``` 14 | ``` 15 | cd envs/mujoco-maze && pip install -e . 16 | ``` 17 | 18 | ## Environments: 19 | - Environments are modified from Mujoco_maze (https://github.com/kngwyu/mujoco-maze) and gym (https://github.com/openai/gym). 20 | 21 | ## Code Usage 22 | ``` 23 | python run_maze_continuous.py --curriculum gradient --interp_metric encoding --num_stage 5 --reward_threshold 0.5 24 | python run_maze_continuous.py --curriculum gradient --interp_metric l2 --num_stage 5 --reward_threshold 0.5 25 | ``` -------------------------------------------------------------------------------- /envs/gym/.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Submit a bug report 4 | title: "[Bug Report] Bug title" 5 | 6 | --- 7 | 8 | If you are submitting a bug report, please fill in the following details and use the tag [bug]. 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Code example** 14 | Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful. 15 | 16 | **System Info** 17 | Describe the characteristic of your environment: 18 | * Describe how Gym was installed (pip, docker, source, ...) 19 | * What OS/version of Linux you're using. Note that while we will accept PRs to improve Window's support, we do not officially support it. 20 | * Python version 21 | 22 | **Additional context** 23 | Add any other context about the problem here. 24 | 25 | ### Checklist 26 | 27 | - [ ] I have checked that there is no similar [issue](https://github.com/openai/gym/issues) in the repo (**required**) 28 | -------------------------------------------------------------------------------- /envs/gym/.github/ISSUE_TEMPLATE/proposal.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Proposal 3 | about: Propose changes that are not fixes bugs 4 | title: "[Proposal] Proposal title" 5 | --- 6 | 7 | 8 | 9 | ### Proposal 10 | 11 | A clear and concise description of the proposal. 12 | 13 | ### Motivation 14 | 15 | Please outline the motivation for the proposal. 16 | Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]". 17 | If this is related to another GitHub issue, please link here too. 18 | 19 | ### Pitch 20 | 21 | A clear and concise description of what you want to happen. 22 | 23 | ### Alternatives 24 | 25 | A clear and concise description of any alternative solutions or features you've considered, if any. 26 | 27 | ### Additional context 28 | 29 | Add any other context or screenshots about the feature request here. 30 | 31 | ### Checklist 32 | 33 | - [ ] I have checked that there is no similar [issue](https://github.com/openai/gym/issues) in the repo (**required**) 34 | -------------------------------------------------------------------------------- /envs/gym/.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask a question 4 | title: "[Question] Question title" 5 | --- 6 | 7 | 8 | ### Question 9 | 10 | If you're a beginner and have basic questions, please ask on [r/reinforcementlearning](https://www.reddit.com/r/reinforcementlearning/) or in the [RL Discord](https://discord.com/invite/xhfNqQv) (if you're new please use the beginners channel). Basic questions that are not bugs or feature requests will be closed without reply, because GitHub issues are not an appropriate venue for these. 11 | 12 | Advanced/nontrivial questions, especially in areas where documentation is lacking, are very much welcome. 13 | -------------------------------------------------------------------------------- /envs/gym/.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Configuration for probot-stale - https://github.com/probot/stale 2 | 3 | # Number of days of inactivity before an Issue or Pull Request becomes stale 4 | daysUntilStale: 60 5 | 6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. 7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. 8 | daysUntilClose: 14 9 | 10 | # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) 11 | onlyLabels: 12 | - more-information-needed 13 | 14 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable 15 | exemptLabels: 16 | - pinned 17 | - security 18 | - "[Status] Maybe Later" 19 | 20 | # Set to true to ignore issues in a project (defaults to false) 21 | exemptProjects: true 22 | 23 | # Set to true to ignore issues in a milestone (defaults to false) 24 | exemptMilestones: true 25 | 26 | # Set to true to ignore issues with an assignee (defaults to false) 27 | exemptAssignees: true 28 | 29 | # Label to use when marking as stale 30 | staleLabel: stale 31 | 32 | # Comment to post when marking as stale. Set to `false` to disable 33 | markComment: > 34 | This issue has been automatically marked as stale because it has not had 35 | recent activity. It will be closed if no further activity occurs. Thank you 36 | for your contributions. 37 | 38 | # Comment to post when removing the stale label. 39 | # unmarkComment: > 40 | # Your comment here. 41 | 42 | # Comment to post when closing a stale Issue or Pull Request. 43 | # closeComment: > 44 | # Your comment here. 45 | 46 | # Limit the number of actions per hour, from 1-30. Default is 30 47 | limitPerRun: 30 48 | 49 | # Limit to only `issues` or `pulls` 50 | only: issues 51 | 52 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': 53 | # pulls: 54 | # daysUntilStale: 30 55 | # markComment: > 56 | # This pull request has been automatically marked as stale because it has not had 57 | # recent activity. It will be closed if no further activity occurs. Thank you 58 | # for your contributions. 59 | 60 | # issues: 61 | # exemptLabels: 62 | # - confirmed -------------------------------------------------------------------------------- /envs/gym/.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [pull_request, push] 3 | 4 | jobs: 5 | build: 6 | runs-on: ubuntu-latest 7 | strategy: 8 | matrix: 9 | python-version: [3.7, 3.8, 3.9] 10 | steps: 11 | - uses: actions/checkout@v2 12 | - run: | 13 | docker build -f py.Dockerfile \ 14 | --build-arg MUJOCO_KEY=$MUJOCO_KEY \ 15 | --build-arg PYTHON_VERSION=${{ matrix.python-version }} \ 16 | --tag gym-docker . 17 | - name: Run tests 18 | run: docker run gym-docker pytest --forked --import-mode=append 19 | -------------------------------------------------------------------------------- /envs/gym/.github/workflows/lint_python.yml: -------------------------------------------------------------------------------- 1 | name: lint_python 2 | on: [pull_request, push] 3 | jobs: 4 | lint_python: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v2 8 | - uses: actions/setup-python@v2 9 | - run: pip install isort mypy pytest pyupgrade safety 10 | - run: isort --check-only --profile black . || true 11 | - run: pip install -e .[nomujoco] 12 | - run: mypy --install-types --non-interactive . || true 13 | - run: pytest . || true 14 | - run: pytest --doctest-modules . || true 15 | - run: shopt -s globstar && pyupgrade --py36-plus **/*.py || true 16 | -------------------------------------------------------------------------------- /envs/gym/.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | # https://pre-commit.com 2 | # This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file. 3 | name: pre-commit 4 | on: 5 | pull_request: 6 | push: 7 | branches: [master] 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - uses: actions/setup-python@v2 14 | - run: pip install pre-commit 15 | - run: pre-commit --version 16 | - run: pre-commit install 17 | - run: pre-commit run --all-files 18 | -------------------------------------------------------------------------------- /envs/gym/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | *.py~ 4 | .DS_Store 5 | .cache 6 | .pytest_cache/ 7 | 8 | # Setuptools distribution and build folders. 9 | /dist/ 10 | /build 11 | 12 | # Virtualenv 13 | /env 14 | 15 | # Python egg metadata, regenerated from source files by setuptools. 16 | /*.egg-info 17 | 18 | *.sublime-project 19 | *.sublime-workspace 20 | 21 | logs/ 22 | 23 | .ipynb_checkpoints 24 | ghostdriver.log 25 | 26 | junk 27 | MUJOCO_LOG.txt 28 | 29 | rllab_mujoco 30 | 31 | tutorial/*.html 32 | 33 | # IDE files 34 | .eggs 35 | .tox 36 | 37 | # PyCharm project files 38 | .idea 39 | vizdoom.ini 40 | -------------------------------------------------------------------------------- /envs/gym/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/PyCQA/bandit/ 3 | rev: 1.7.0 4 | hooks: 5 | - id: bandit 6 | args: 7 | - --recursive 8 | - --skip 9 | - B101,B108,B301,B403,B404,B603 10 | - . 11 | - repo: https://github.com/python/black 12 | rev: 21.7b0 13 | hooks: 14 | - id: black 15 | - repo: https://github.com/codespell-project/codespell 16 | rev: v2.1.0 17 | hooks: 18 | - id: codespell 19 | args: 20 | - --ignore-words-list=nd,reacher,thist,ths 21 | - repo: https://gitlab.com/pycqa/flake8 22 | rev: 3.9.2 23 | hooks: 24 | - id: flake8 25 | args: 26 | - --ignore=E203,E402,E712,E722,E731,E741,F401,F403,F405,F524,F841,W503 27 | - --max-complexity=30 28 | - --max-line-length=456 29 | - --show-source 30 | - --statistics 31 | -------------------------------------------------------------------------------- /envs/gym/CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | OpenAI Gym is dedicated to providing a harassment-free experience for 2 | everyone, regardless of gender, gender identity and expression, sexual 3 | orientation, disability, physical appearance, body size, age, race, or 4 | religion. We do not tolerate harassment of participants in any form. 5 | 6 | This code of conduct applies to all OpenAI Gym spaces (including Gist 7 | comments) both online and off. Anyone who violates this code of 8 | conduct may be sanctioned or expelled from these spaces at the 9 | discretion of the OpenAI team. 10 | 11 | We may add additional rules over time, which will be made clearly 12 | available to participants. Participants are responsible for knowing 13 | and abiding by these rules. 14 | -------------------------------------------------------------------------------- /envs/gym/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Gym Contribution Guidelines 2 | 3 | At this time we are currently accepting the current forms of contributions: 4 | 5 | - Bug reports (keep in mind that changing environment behavior should be minimized as that requires releasing a new version of the environment and makes results hard to compare across versions) 6 | - Pull requests for bug fixes 7 | - Documentation improvements 8 | 9 | Notably, we are not accepting these forms of contributions: 10 | 11 | - New environments 12 | - New features 13 | 14 | This may change in the future. 15 | If you wish to make a Gym environment, follow the instructions in [Creating Environments](https://github.com/openai/gym/blob/master/docs/creating-environments.md). When your environment works, you can make a PR to add it to the bottom of the [List of Environments](https://github.com/openai/gym/blob/master/docs/environments.md). 16 | 17 | 18 | Edit July 27, 2021: Please see https://github.com/openai/gym/issues/2259 for new contributing standards 19 | -------------------------------------------------------------------------------- /envs/gym/LICENSE.md: -------------------------------------------------------------------------------- 1 | # gym 2 | 3 | The MIT License 4 | 5 | Copyright (c) 2016 OpenAI (https://openai.com) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | # Mujoco models 26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license: 27 | ``` 28 | This file is part of MuJoCo. 29 | Copyright 2009-2015 Roboti LLC. 30 | Mujoco :: Advanced physics simulation engine 31 | Source : www.roboti.us 32 | Version : 1.31 33 | Released : 23Apr16 34 | Author :: Vikash Kumar 35 | Contacts : kumar@roboti.us 36 | ``` 37 | -------------------------------------------------------------------------------- /envs/gym/bin/docker_entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is the entrypoint for our Docker image. 3 | 4 | set -ex 5 | 6 | # Set up display; otherwise rendering will fail 7 | Xvfb -screen 0 1024x768x24 & 8 | export DISPLAY=:0 9 | 10 | # Wait for the file to come up 11 | display=0 12 | file="/tmp/.X11-unix/X$display" 13 | for i in $(seq 1 10); do 14 | if [ -e "$file" ]; then 15 | break 16 | fi 17 | 18 | echo "Waiting for $file to be created (try $i/10)" 19 | sleep "$i" 20 | done 21 | if ! [ -e "$file" ]; then 22 | echo "Timing out: $file was not created" 23 | exit 1 24 | fi 25 | 26 | exec "$@" 27 | -------------------------------------------------------------------------------- /envs/gym/docs/creating_environments.md: -------------------------------------------------------------------------------- 1 | # How to create new environments for Gym 2 | 3 | * Create a new repo called gym-foo, which should also be a PIP package. 4 | 5 | * A good example is https://github.com/openai/gym-soccer. 6 | 7 | * It should have at least the following files: 8 | ```sh 9 | gym-foo/ 10 | README.md 11 | setup.py 12 | gym_foo/ 13 | __init__.py 14 | envs/ 15 | __init__.py 16 | foo_env.py 17 | foo_extrahard_env.py 18 | ``` 19 | 20 | * `gym-foo/setup.py` should have: 21 | 22 | ```python 23 | from setuptools import setup 24 | 25 | setup(name='gym_foo', 26 | version='0.0.1', 27 | install_requires=['gym'] # And any other dependencies foo needs 28 | ) 29 | ``` 30 | 31 | * `gym-foo/gym_foo/__init__.py` should have: 32 | ```python 33 | from gym.envs.registration import register 34 | 35 | register( 36 | id='foo-v0', 37 | entry_point='gym_foo.envs:FooEnv', 38 | ) 39 | register( 40 | id='foo-extrahard-v0', 41 | entry_point='gym_foo.envs:FooExtraHardEnv', 42 | ) 43 | ``` 44 | 45 | * `gym-foo/gym_foo/envs/__init__.py` should have: 46 | ```python 47 | from gym_foo.envs.foo_env import FooEnv 48 | from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv 49 | ``` 50 | 51 | * `gym-foo/gym_foo/envs/foo_env.py` should look something like: 52 | ```python 53 | import gym 54 | from gym import error, spaces, utils 55 | from gym.utils import seeding 56 | 57 | class FooEnv(gym.Env): 58 | metadata = {'render.modes': ['human']} 59 | 60 | def __init__(self): 61 | ... 62 | def step(self, action): 63 | ... 64 | def reset(self): 65 | ... 66 | def render(self, mode='human'): 67 | ... 68 | def close(self): 69 | ... 70 | ``` 71 | 72 | * After you have installed your package with `pip install -e gym-foo`, you can create an instance of the environment with `gym.make('gym_foo:foo-v0')` 73 | -------------------------------------------------------------------------------- /envs/gym/docs/toy_text/blackjack.md: -------------------------------------------------------------------------------- 1 | Blackjack 2 | --- 3 | |Title|Action Type|Action Shape|Action Values|Observation Shape|Observation Values|Average Total Reward|Import| 4 | | ----------- | -----------| ----------- | -----------| ----------- | -----------| ----------- | -----------| 5 | |Blackjack|Discrete|(1,)|(0,1)|(3,)|[(0,31),(0,10),(0,1)]| |from gym.envs.toy_text import blackjack| 6 | --- 7 | 8 | Blackjack is a card game where the goal is to obtain cards that sum to as near as possible to 21 without going over. They're playing against a fixed dealer. 9 | 10 | Card Values: 11 | 12 | - Face cards (Jack, Queen, King) have point value 10. 13 | - Aces can either count as 11 or 1, and it's called 'usable ace' at 11. 14 | - Numerical cards (2-9) have value of their number. 15 | 16 | This game is placed with an infinite deck (or with replacement). 17 | The game starts with dealer having one face up and one face down card, while player having two face up cards. 18 | 19 | The player can request additional cards (hit, action=1) until they decide to stop 20 | (stick, action=0) or exceed 21 (bust). 21 | After the player sticks, the dealer reveals their facedown card, and draws 22 | until their sum is 17 or greater. If the dealer goes bust the player wins. 23 | If neither player nor dealer busts, the outcome (win, lose, draw) is 24 | decided by whose sum is closer to 21. 25 | 26 | The agent take a 1-element vector for actions. 27 | The action space is `(action)`, where: 28 | - `action` is used to decide stick/hit for values (0,1). 29 | 30 | The observation of a 3-tuple of: the players current sum, 31 | the dealer's one showing card (1-10 where 1 is ace), and whether or not the player holds a usable ace (0 or 1). 32 | 33 | This environment corresponds to the version of the blackjack problem 34 | described in Example 5.1 in Reinforcement Learning: An Introduction 35 | by Sutton and Barto. 36 | http://incompleteideas.net/book/the-book-2nd.html 37 | 38 | **Rewards:** 39 | 40 | Reward schedule: 41 | - win game: +1 42 | - lose game: -1 43 | - draw game: 0 44 | - win game with natural blackjack: 45 | 46 | +1.5 (if natural is True.) 47 | 48 | +1 (if natural is False.) 49 | 50 | ### Arguments 51 | 52 | ``` 53 | gym.make('Blackjack-v0', natural=False) 54 | ``` 55 | 56 | `natural`: Whether to give an additional reward for starting with a natural blackjack, i.e. starting with an ace and ten (sum is 21). 57 | 58 | ### Version History 59 | 60 | * v0: Initial versions release (1.0.0) 61 | -------------------------------------------------------------------------------- /envs/gym/docs/toy_text/frozen_lake.md: -------------------------------------------------------------------------------- 1 | Frozen Lake 2 | --- 3 | |Title|Action Type|Action Shape|Action Values|Observation Shape|Observation Values|Average Total Reward|Import| 4 | | ----------- | -----------| ----------- | -----------| ----------- | -----------| ----------- | -----------| 5 | |Frozen Lake|Discrete|(1,)|(0,3)|(1,)|(0,nrows*ncolumns)| |from gym.envs.toy_text import frozen_lake| 6 | --- 7 | 8 | 9 | Frozen lake involves crossing a frozen lake from Start(S) to goal(G) without falling into any holes(H). The agent may not always move in the intended direction due to the slippery nature of the frozen lake. 10 | 11 | The agent take a 1-element vector for actions. 12 | The action space is `(dir)`, where `dir` decides direction to move in which can be: 13 | 14 | - 0: LEFT 15 | - 1: DOWN 16 | - 2: RIGHT 17 | - 3: UP 18 | 19 | The observation is a value representing the agents current position as 20 | 21 | current_row * nrows + current_col 22 | 23 | **Rewards:** 24 | 25 | Reward schedule: 26 | - Reach goal(G): +1 27 | - Reach hole(H): 0 28 | 29 | ### Arguments 30 | 31 | ``` 32 | gym.make('FrozenLake-v0', desc=None,map_name="4x4", is_slippery=True) 33 | ``` 34 | 35 | `desc`: Used to specify custom map for frozen lake. For example, 36 | 37 | desc=["SFFF", "FHFH", "FFFH", "HFFG"]. 38 | 39 | `map_name`: ID to use any of the preloaded maps. 40 | 41 | "4x4":[ 42 | "SFFF", 43 | "FHFH", 44 | "FFFH", 45 | "HFFG" 46 | ] 47 | 48 | "8x8": [ 49 | "SFFFFFFF", 50 | "FFFFFFFF", 51 | "FFFHFFFF", 52 | "FFFFFHFF", 53 | "FFFHFFFF", 54 | "FHHFFFHF", 55 | "FHFFHFHF", 56 | "FFFHFFFG", 57 | ] 58 | 59 | 60 | 61 | 62 | `is_slippery`: True/False. If True will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions. 63 | 64 | For example, if action is left and is_slippery is True, then: 65 | - P(move left)=1/3 66 | - P(move up)=1/3 67 | - P(move down)=1/3 68 | ### Version History 69 | 70 | * v0: Initial versions release (1.0.0) 71 | -------------------------------------------------------------------------------- /envs/gym/docs/tutorials.md: -------------------------------------------------------------------------------- 1 | ## Getting Started With OpenAI Gym: The Basic Building Blocks 2 | 3 | https://blog.paperspace.com/getting-started-with-openai-gym/ 4 | 5 | A good starting point explaining all the basic building blocks of the Gym API. 6 | 7 | 8 | 9 | ## Reinforcement Q-Learning from Scratch in Python with OpenAI Gym 10 | Good Algorithmic Introduction to Reinforcement Learning showcasing how to use Gym API for Training Agents. 11 | 12 | https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/ 13 | 14 | 15 | ## Tutorial: An Introduction to Reinforcement Learning Using OpenAI Gym 16 | 17 | https://www.gocoder.one/blog/rl-tutorial-with-openai-gym 18 | -------------------------------------------------------------------------------- /envs/gym/gym/__init__.py: -------------------------------------------------------------------------------- 1 | import distutils.version 2 | import os 3 | import sys 4 | import warnings 5 | 6 | from gym import error 7 | from gym.version import VERSION as __version__ 8 | 9 | from gym.core import ( 10 | Env, 11 | GoalEnv, 12 | Wrapper, 13 | ObservationWrapper, 14 | ActionWrapper, 15 | RewardWrapper, 16 | ) 17 | from gym.spaces import Space 18 | from gym.envs import make, spec, register 19 | from gym import logger 20 | from gym import vector 21 | from gym import wrappers 22 | 23 | __all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"] 24 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/README.md: -------------------------------------------------------------------------------- 1 | # Envs 2 | 3 | These are the core integrated environments. Note that we may later 4 | restructure any of the files, but will keep the environments available 5 | at the relevant package's top-level. So for example, you should access 6 | `AntEnv` as follows: 7 | 8 | ``` 9 | # Will be supported in future releases 10 | from gym.envs import mujoco 11 | mujoco.AntEnv 12 | ``` 13 | 14 | Rather than: 15 | 16 | ``` 17 | # May break in future releases 18 | from gym.envs.mujoco import ant 19 | ant.AntEnv 20 | ``` -------------------------------------------------------------------------------- /envs/gym/gym/envs/box2d/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import Box2D 3 | from gym.envs.box2d.lunar_lander import LunarLander 4 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous 5 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore 6 | from gym.envs.box2d.car_racing import CarRacing 7 | except ImportError: 8 | Box2D = None 9 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/box2d/parking_garage/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "André Biedenkapp" 2 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/classic_control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.classic_control.cartpole import CartPoleEnv 2 | from gym.envs.classic_control.mountain_car import MountainCarEnv 3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv 4 | from gym.envs.classic_control.pendulum import PendulumEnv 5 | from gym.envs.classic_control.acrobot import AcrobotEnv 6 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/classic_control/assets/clockwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/classic_control/assets/clockwise.png -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco.mujoco_env import MujocoEnv 2 | 3 | # ^^^^^ so that user gets the correct error 4 | # message if mujoco is not installed correctly 5 | from gym.envs.mujoco.ant import AntEnv 6 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 7 | from gym.envs.mujoco.hopper import HopperEnv 8 | from gym.envs.mujoco.walker2d import Walker2dEnv 9 | from gym.envs.mujoco.humanoid import HumanoidEnv 10 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv 11 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv 12 | from gym.envs.mujoco.reacher import ReacherEnv 13 | from gym.envs.mujoco.swimmer import SwimmerEnv 14 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv 15 | from gym.envs.mujoco.pusher import PusherEnv 16 | from gym.envs.mujoco.thrower import ThrowerEnv 17 | from gym.envs.mujoco.striker import StrikerEnv 18 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/ant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, a): 12 | xposbefore = self.get_body_com("torso")[0] 13 | self.do_simulation(a, self.frame_skip) 14 | xposafter = self.get_body_com("torso")[0] 15 | forward_reward = (xposafter - xposbefore) / self.dt 16 | ctrl_cost = 0.5 * np.square(a).sum() 17 | contact_cost = ( 18 | 0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))) 19 | ) 20 | survive_reward = 1.0 21 | reward = forward_reward - ctrl_cost - contact_cost + survive_reward 22 | state = self.state_vector() 23 | notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0 24 | done = not notdone 25 | ob = self._get_obs() 26 | return ( 27 | ob, 28 | reward, 29 | done, 30 | dict( 31 | reward_forward=forward_reward, 32 | reward_ctrl=-ctrl_cost, 33 | reward_contact=-contact_cost, 34 | reward_survive=survive_reward, 35 | ), 36 | ) 37 | 38 | def _get_obs(self): 39 | return np.concatenate( 40 | [ 41 | self.sim.data.qpos.flat[2:], 42 | self.sim.data.qvel.flat, 43 | np.clip(self.sim.data.cfrc_ext, -1, 1).flat, 44 | ] 45 | ) 46 | 47 | def reset_model(self): 48 | qpos = self.init_qpos + self.np_random.uniform( 49 | size=self.model.nq, low=-0.1, high=0.1 50 | ) 51 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1 52 | self.set_state(qpos, qvel) 53 | return self._get_obs() 54 | 55 | def viewer_setup(self): 56 | self.viewer.cam.distance = self.model.stat.extent * 0.5 57 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/assets/inverted_double_pendulum.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/assets/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/assets/point.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 32 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/assets/reacher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/assets/swimmer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 40 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 5) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, action): 12 | xposbefore = self.sim.data.qpos[0] 13 | self.do_simulation(action, self.frame_skip) 14 | xposafter = self.sim.data.qpos[0] 15 | ob = self._get_obs() 16 | reward_ctrl = -0.1 * np.square(action).sum() 17 | reward_run = (xposafter - xposbefore) / self.dt 18 | reward = reward_ctrl + reward_run 19 | done = False 20 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 21 | 22 | def _get_obs(self): 23 | return np.concatenate( 24 | [ 25 | self.sim.data.qpos.flat[1:], 26 | self.sim.data.qvel.flat, 27 | ] 28 | ) 29 | 30 | def reset_model(self): 31 | qpos = self.init_qpos + self.np_random.uniform( 32 | low=-0.1, high=0.1, size=self.model.nq 33 | ) 34 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1 35 | self.set_state(qpos, qvel) 36 | return self._get_obs() 37 | 38 | def viewer_setup(self): 39 | self.viewer.cam.distance = self.model.stat.extent * 0.5 40 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/hopper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, a): 12 | posbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | posafter, height, ang = self.sim.data.qpos[0:3] 15 | alive_bonus = 1.0 16 | reward = (posafter - posbefore) / self.dt 17 | reward += alive_bonus 18 | reward -= 1e-3 * np.square(a).sum() 19 | s = self.state_vector() 20 | done = not ( 21 | np.isfinite(s).all() 22 | and (np.abs(s[2:]) < 100).all() 23 | and (height > 0.7) 24 | and (abs(ang) < 0.2) 25 | ) 26 | ob = self._get_obs() 27 | return ob, reward, done, {} 28 | 29 | def _get_obs(self): 30 | return np.concatenate( 31 | [self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)] 32 | ) 33 | 34 | def reset_model(self): 35 | qpos = self.init_qpos + self.np_random.uniform( 36 | low=-0.005, high=0.005, size=self.model.nq 37 | ) 38 | qvel = self.init_qvel + self.np_random.uniform( 39 | low=-0.005, high=0.005, size=self.model.nv 40 | ) 41 | self.set_state(qpos, qvel) 42 | return self._get_obs() 43 | 44 | def viewer_setup(self): 45 | self.viewer.cam.trackbodyid = 2 46 | self.viewer.cam.distance = self.model.stat.extent * 0.75 47 | self.viewer.cam.lookat[2] = 1.15 48 | self.viewer.cam.elevation = -20 49 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/humanoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.envs.mujoco import mujoco_env 3 | from gym import utils 4 | 5 | 6 | def mass_center(model, sim): 7 | mass = np.expand_dims(model.body_mass, 1) 8 | xpos = sim.data.xipos 9 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0] 10 | 11 | 12 | class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): 13 | def __init__(self): 14 | mujoco_env.MujocoEnv.__init__(self, "humanoid.xml", 5) 15 | utils.EzPickle.__init__(self) 16 | 17 | def _get_obs(self): 18 | data = self.sim.data 19 | return np.concatenate( 20 | [ 21 | data.qpos.flat[2:], 22 | data.qvel.flat, 23 | data.cinert.flat, 24 | data.cvel.flat, 25 | data.qfrc_actuator.flat, 26 | data.cfrc_ext.flat, 27 | ] 28 | ) 29 | 30 | def step(self, a): 31 | pos_before = mass_center(self.model, self.sim) 32 | self.do_simulation(a, self.frame_skip) 33 | pos_after = mass_center(self.model, self.sim) 34 | alive_bonus = 5.0 35 | data = self.sim.data 36 | lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt 37 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 38 | quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() 39 | quad_impact_cost = min(quad_impact_cost, 10) 40 | reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus 41 | qpos = self.sim.data.qpos 42 | done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) 43 | return ( 44 | self._get_obs(), 45 | reward, 46 | done, 47 | dict( 48 | reward_linvel=lin_vel_cost, 49 | reward_quadctrl=-quad_ctrl_cost, 50 | reward_alive=alive_bonus, 51 | reward_impact=-quad_impact_cost, 52 | ), 53 | ) 54 | 55 | def reset_model(self): 56 | c = 0.01 57 | self.set_state( 58 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 59 | self.init_qvel 60 | + self.np_random.uniform( 61 | low=-c, 62 | high=c, 63 | size=self.model.nv, 64 | ), 65 | ) 66 | return self._get_obs() 67 | 68 | def viewer_setup(self): 69 | self.viewer.cam.trackbodyid = 1 70 | self.viewer.cam.distance = self.model.stat.extent * 1.0 71 | self.viewer.cam.lookat[2] = 2.0 72 | self.viewer.cam.elevation = -20 73 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/humanoidstandup.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco import mujoco_env 2 | from gym import utils 3 | import numpy as np 4 | 5 | 6 | class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5) 9 | utils.EzPickle.__init__(self) 10 | 11 | def _get_obs(self): 12 | data = self.sim.data 13 | return np.concatenate( 14 | [ 15 | data.qpos.flat[2:], 16 | data.qvel.flat, 17 | data.cinert.flat, 18 | data.cvel.flat, 19 | data.qfrc_actuator.flat, 20 | data.cfrc_ext.flat, 21 | ] 22 | ) 23 | 24 | def step(self, a): 25 | self.do_simulation(a, self.frame_skip) 26 | pos_after = self.sim.data.qpos[2] 27 | data = self.sim.data 28 | uph_cost = (pos_after - 0) / self.model.opt.timestep 29 | 30 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 31 | quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() 32 | quad_impact_cost = min(quad_impact_cost, 10) 33 | reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 34 | 35 | done = bool(False) 36 | return ( 37 | self._get_obs(), 38 | reward, 39 | done, 40 | dict( 41 | reward_linup=uph_cost, 42 | reward_quadctrl=-quad_ctrl_cost, 43 | reward_impact=-quad_impact_cost, 44 | ), 45 | ) 46 | 47 | def reset_model(self): 48 | c = 0.01 49 | self.set_state( 50 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 51 | self.init_qvel 52 | + self.np_random.uniform( 53 | low=-c, 54 | high=c, 55 | size=self.model.nv, 56 | ), 57 | ) 58 | return self._get_obs() 59 | 60 | def viewer_setup(self): 61 | self.viewer.cam.trackbodyid = 1 62 | self.viewer.cam.distance = self.model.stat.extent * 1.0 63 | self.viewer.cam.lookat[2] = 0.8925 64 | self.viewer.cam.elevation = -20 65 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/inverted_double_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "inverted_double_pendulum.xml", 5) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, action): 12 | self.do_simulation(action, self.frame_skip) 13 | ob = self._get_obs() 14 | x, _, y = self.sim.data.site_xpos[0] 15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2 16 | v1, v2 = self.sim.data.qvel[1:3] 17 | vel_penalty = 1e-3 * v1 ** 2 + 5e-3 * v2 ** 2 18 | alive_bonus = 10 19 | r = alive_bonus - dist_penalty - vel_penalty 20 | done = bool(y <= 1) 21 | return ob, r, done, {} 22 | 23 | def _get_obs(self): 24 | return np.concatenate( 25 | [ 26 | self.sim.data.qpos[:1], # cart x pos 27 | np.sin(self.sim.data.qpos[1:]), # link angles 28 | np.cos(self.sim.data.qpos[1:]), 29 | np.clip(self.sim.data.qvel, -10, 10), 30 | np.clip(self.sim.data.qfrc_constraint, -10, 10), 31 | ] 32 | ).ravel() 33 | 34 | def reset_model(self): 35 | self.set_state( 36 | self.init_qpos 37 | + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq), 38 | self.init_qvel + self.np_random.randn(self.model.nv) * 0.1, 39 | ) 40 | return self._get_obs() 41 | 42 | def viewer_setup(self): 43 | v = self.viewer 44 | v.cam.trackbodyid = 0 45 | v.cam.distance = self.model.stat.extent * 0.5 46 | v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2] 47 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | utils.EzPickle.__init__(self) 9 | mujoco_env.MujocoEnv.__init__(self, "inverted_pendulum.xml", 2) 10 | 11 | def step(self, a): 12 | reward = 1.0 13 | self.do_simulation(a, self.frame_skip) 14 | ob = self._get_obs() 15 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= 0.2) 16 | done = not notdone 17 | return ob, reward, done, {} 18 | 19 | def reset_model(self): 20 | qpos = self.init_qpos + self.np_random.uniform( 21 | size=self.model.nq, low=-0.01, high=0.01 22 | ) 23 | qvel = self.init_qvel + self.np_random.uniform( 24 | size=self.model.nv, low=-0.01, high=0.01 25 | ) 26 | self.set_state(qpos, qvel) 27 | return self._get_obs() 28 | 29 | def _get_obs(self): 30 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel() 31 | 32 | def viewer_setup(self): 33 | v = self.viewer 34 | v.cam.trackbodyid = 0 35 | v.cam.distance = self.model.stat.extent 36 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/pusher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | import mujoco_py 6 | 7 | 8 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): 9 | def __init__(self): 10 | utils.EzPickle.__init__(self) 11 | mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5) 12 | 13 | def step(self, a): 14 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") 15 | vec_2 = self.get_body_com("object") - self.get_body_com("goal") 16 | 17 | reward_near = -np.linalg.norm(vec_1) 18 | reward_dist = -np.linalg.norm(vec_2) 19 | reward_ctrl = -np.square(a).sum() 20 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near 21 | 22 | self.do_simulation(a, self.frame_skip) 23 | ob = self._get_obs() 24 | done = False 25 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) 26 | 27 | def viewer_setup(self): 28 | self.viewer.cam.trackbodyid = -1 29 | self.viewer.cam.distance = 4.0 30 | 31 | def reset_model(self): 32 | qpos = self.init_qpos 33 | 34 | self.goal_pos = np.asarray([0, 0]) 35 | while True: 36 | # self.cylinder_pos = np.concatenate( 37 | # [ 38 | # self.np_random.uniform(low=-0.3, high=0, size=1), 39 | # self.np_random.uniform(low=-0.2, high=0.2, size=1), 40 | # ] 41 | # ) 42 | random_idx = self.np_random.choice(self.inter_context.shape[0]) 43 | self.cylinder_pos = self.inter_context[random_idx, :] + 0.01*np.random.randn(2, ) 44 | # print(self.cylinder_pos) 45 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: 46 | break 47 | 48 | qpos[-4:-2] = self.cylinder_pos 49 | qpos[-2:] = self.goal_pos 50 | qvel = self.init_qvel + self.np_random.uniform( 51 | low=-0.005, high=0.005, size=self.model.nv 52 | ) 53 | qvel[-4:] = 0 54 | self.set_state(qpos, qvel) 55 | return self._get_obs() 56 | 57 | def set_context_dist(self, context_dist): 58 | self.inter_context = context_dist 59 | 60 | def _get_obs(self): 61 | return np.concatenate( 62 | [ 63 | self.sim.data.qpos.flat[:7], 64 | self.sim.data.qvel.flat[:7], 65 | self.get_body_com("tips_arm"), 66 | self.get_body_com("object"), 67 | self.get_body_com("goal"), 68 | ] 69 | ) 70 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/striker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | utils.EzPickle.__init__(self) 9 | self._striked = False 10 | self._min_strike_dist = np.inf 11 | self.strike_threshold = 0.1 12 | mujoco_env.MujocoEnv.__init__(self, "striker.xml", 5) 13 | 14 | def step(self, a): 15 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") 16 | vec_2 = self.get_body_com("object") - self.get_body_com("goal") 17 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2)) 18 | 19 | if np.linalg.norm(vec_1) < self.strike_threshold: 20 | self._striked = True 21 | self._strike_pos = self.get_body_com("tips_arm") 22 | 23 | if self._striked: 24 | vec_3 = self.get_body_com("object") - self._strike_pos 25 | reward_near = -np.linalg.norm(vec_3) 26 | else: 27 | reward_near = -np.linalg.norm(vec_1) 28 | 29 | reward_dist = -np.linalg.norm(self._min_strike_dist) 30 | reward_ctrl = -np.square(a).sum() 31 | reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near 32 | 33 | self.do_simulation(a, self.frame_skip) 34 | ob = self._get_obs() 35 | done = False 36 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) 37 | 38 | def viewer_setup(self): 39 | self.viewer.cam.trackbodyid = 0 40 | self.viewer.cam.distance = 4.0 41 | 42 | def reset_model(self): 43 | self._min_strike_dist = np.inf 44 | self._striked = False 45 | self._strike_pos = None 46 | 47 | qpos = self.init_qpos 48 | 49 | self.ball = np.array([0.5, -0.175]) 50 | while True: 51 | self.goal = np.concatenate( 52 | [ 53 | self.np_random.uniform(low=0.15, high=0.7, size=1), 54 | self.np_random.uniform(low=0.1, high=1.0, size=1), 55 | ] 56 | ) 57 | if np.linalg.norm(self.ball - self.goal) > 0.17: 58 | break 59 | 60 | qpos[-9:-7] = [self.ball[1], self.ball[0]] 61 | qpos[-7:-5] = self.goal 62 | diff = self.ball - self.goal 63 | angle = -np.arctan(diff[0] / (diff[1] + 1e-8)) 64 | qpos[-1] = angle / 3.14 65 | qvel = self.init_qvel + self.np_random.uniform( 66 | low=-0.1, high=0.1, size=self.model.nv 67 | ) 68 | qvel[7:] = 0 69 | self.set_state(qpos, qvel) 70 | return self._get_obs() 71 | 72 | def _get_obs(self): 73 | return np.concatenate( 74 | [ 75 | self.sim.data.qpos.flat[:7], 76 | self.sim.data.qvel.flat[:7], 77 | self.get_body_com("tips_arm"), 78 | self.get_body_com("object"), 79 | self.get_body_com("goal"), 80 | ] 81 | ) 82 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/swimmer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "swimmer.xml", 4) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, a): 12 | ctrl_cost_coeff = 0.0001 13 | xposbefore = self.sim.data.qpos[0] 14 | self.do_simulation(a, self.frame_skip) 15 | xposafter = self.sim.data.qpos[0] 16 | reward_fwd = (xposafter - xposbefore) / self.dt 17 | reward_ctrl = -ctrl_cost_coeff * np.square(a).sum() 18 | reward = reward_fwd + reward_ctrl 19 | ob = self._get_obs() 20 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl) 21 | 22 | def _get_obs(self): 23 | qpos = self.sim.data.qpos 24 | qvel = self.sim.data.qvel 25 | return np.concatenate([qpos.flat[2:], qvel.flat]) 26 | 27 | def reset_model(self): 28 | self.set_state( 29 | self.init_qpos 30 | + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq), 31 | self.init_qvel 32 | + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv), 33 | ) 34 | return self._get_obs() 35 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/thrower.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | utils.EzPickle.__init__(self) 9 | self._ball_hit_ground = False 10 | self._ball_hit_location = None 11 | mujoco_env.MujocoEnv.__init__(self, "thrower.xml", 5) 12 | 13 | def step(self, a): 14 | ball_xy = self.get_body_com("ball")[:2] 15 | goal_xy = self.get_body_com("goal")[:2] 16 | 17 | if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25: 18 | self._ball_hit_ground = True 19 | self._ball_hit_location = self.get_body_com("ball") 20 | 21 | if self._ball_hit_ground: 22 | ball_hit_xy = self._ball_hit_location[:2] 23 | reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy) 24 | else: 25 | reward_dist = -np.linalg.norm(ball_xy - goal_xy) 26 | reward_ctrl = -np.square(a).sum() 27 | 28 | reward = reward_dist + 0.002 * reward_ctrl 29 | self.do_simulation(a, self.frame_skip) 30 | ob = self._get_obs() 31 | done = False 32 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) 33 | 34 | def viewer_setup(self): 35 | self.viewer.cam.trackbodyid = 0 36 | self.viewer.cam.distance = 4.0 37 | 38 | def reset_model(self): 39 | self._ball_hit_ground = False 40 | self._ball_hit_location = None 41 | 42 | qpos = self.init_qpos 43 | self.goal = np.array( 44 | [ 45 | self.np_random.uniform(low=-0.3, high=0.3), 46 | self.np_random.uniform(low=-0.3, high=0.3), 47 | ] 48 | ) 49 | 50 | qpos[-9:-7] = self.goal 51 | qvel = self.init_qvel + self.np_random.uniform( 52 | low=-0.005, high=0.005, size=self.model.nv 53 | ) 54 | qvel[7:] = 0 55 | self.set_state(qpos, qvel) 56 | return self._get_obs() 57 | 58 | def _get_obs(self): 59 | return np.concatenate( 60 | [ 61 | self.sim.data.qpos.flat[:7], 62 | self.sim.data.qvel.flat[:7], 63 | self.get_body_com("r_wrist_roll_link"), 64 | self.get_body_com("ball"), 65 | self.get_body_com("goal"), 66 | ] 67 | ) 68 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/mujoco/walker2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | 6 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, a): 12 | posbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | posafter, height, ang = self.sim.data.qpos[0:3] 15 | alive_bonus = 1.0 16 | reward = (posafter - posbefore) / self.dt 17 | reward += alive_bonus 18 | reward -= 1e-3 * np.square(a).sum() 19 | done = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0) 20 | ob = self._get_obs() 21 | return ob, reward, done, {} 22 | 23 | def _get_obs(self): 24 | qpos = self.sim.data.qpos 25 | qvel = self.sim.data.qvel 26 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel() 27 | 28 | def reset_model(self): 29 | self.set_state( 30 | self.init_qpos 31 | + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq), 32 | self.init_qvel 33 | + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv), 34 | ) 35 | return self._get_obs() 36 | 37 | def viewer_setup(self): 38 | self.viewer.cam.trackbodyid = 2 39 | self.viewer.cam.distance = self.model.stat.extent * 0.5 40 | self.viewer.cam.lookat[2] = 1.15 41 | self.viewer.cam.elevation = -20 42 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/README.md: -------------------------------------------------------------------------------- 1 | # Robotics environments 2 | 3 | Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics). 4 | 5 | If you use these environments, please cite the following paper: 6 | 7 | ``` 8 | @misc{1802.09464, 9 | Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba}, 10 | Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research}, 11 | Year = {2018}, 12 | Eprint = {arXiv:1802.09464}, 13 | } 14 | ``` 15 | 16 | ## Fetch environments 17 | 18 | 19 | [FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position. 20 | 21 | 22 | 23 | 24 | [FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal. 25 | 26 | 27 | 28 | 29 | [FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position. 30 | 31 | 32 | 33 | 34 | [FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table. 35 | 36 | ## Shadow Dexterous Hand environments 37 | 38 | 39 | [HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm. 40 | 41 | 42 | 43 | 44 | [HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation. 45 | 46 | 47 | 48 | 49 | [HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation. 50 | 51 | 52 | 53 | 54 | [HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation. 55 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.robotics.fetch_env import FetchEnv 2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv 3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv 4 | from gym.envs.robotics.fetch.push import FetchPushEnv 5 | from gym.envs.robotics.fetch.reach import FetchReachEnv 6 | 7 | from gym.envs.robotics.hand.reach import HandReachEnv 8 | from gym.envs.robotics.hand.manipulate import HandBlockEnv 9 | from gym.envs.robotics.hand.manipulate import HandEggEnv 10 | from gym.envs.robotics.hand.manipulate import HandPenEnv 11 | 12 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv 13 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv 14 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv 15 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/fetch/pick_and_place.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/fetch/push.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/fetch/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/fetch/slide.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/hand/manipulate_block.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/hand/manipulate_egg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/hand/manipulate_egg_touch_sensors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/hand/manipulate_pen.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/hand/manipulate_pen_touch_sensors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/hand/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/hand/shared_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/.get: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/.get -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/estop_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/estop_link.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/gripper_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/gripper_link.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/laser_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/laser_link.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/F1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/F1.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/F2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/F2.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/F3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/F3.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/TH1_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/TH1_z.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/TH2_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/TH2_z.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/TH3_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/TH3_z.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/forearm_electric.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/forearm_electric.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/knuckle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/knuckle.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/palm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/palm.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/stls/hand/wrist.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/wrist.stl -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/textures/block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/textures/block.png -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/assets/textures/block_hidden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/textures/block_hidden.png -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/fetch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/fetch/__init__.py -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/fetch/pick_and_place.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join("fetch", "pick_and_place.xml") 8 | 9 | 10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type="sparse"): 12 | initial_qpos = { 13 | "robot0:slide0": 0.405, 14 | "robot0:slide1": 0.48, 15 | "robot0:slide2": 0.0, 16 | "object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, 20 | MODEL_XML_PATH, 21 | has_object=True, 22 | block_gripper=False, 23 | n_substeps=20, 24 | gripper_extra_height=0.2, 25 | target_in_the_air=True, 26 | target_offset=0.0, 27 | obj_range=0.15, 28 | target_range=0.15, 29 | distance_threshold=0.05, 30 | initial_qpos=initial_qpos, 31 | reward_type=reward_type, 32 | ) 33 | utils.EzPickle.__init__(self, reward_type=reward_type) 34 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/fetch/push.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join("fetch", "push.xml") 8 | 9 | 10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type="sparse"): 12 | initial_qpos = { 13 | "robot0:slide0": 0.05, 14 | "robot0:slide1": 0.48, 15 | "robot0:slide2": 0.0, 16 | "object0:joint": [1.7, 1.1, 0.41, 1.0, 0.0, 0.0, 0.0], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, 20 | MODEL_XML_PATH, 21 | has_object=True, 22 | block_gripper=True, 23 | n_substeps=20, 24 | gripper_extra_height=0.0, 25 | target_in_the_air=False, 26 | target_offset=0.0, 27 | obj_range=0.15, 28 | target_range=0.15, 29 | distance_threshold=0.15, 30 | initial_qpos=initial_qpos, 31 | reward_type=reward_type, 32 | ) 33 | utils.EzPickle.__init__(self, reward_type=reward_type) 34 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/fetch/reach.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join("fetch", "reach.xml") 8 | 9 | 10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type="sparse"): 12 | initial_qpos = { 13 | "robot0:slide0": 0.4049, 14 | "robot0:slide1": 0.48, 15 | "robot0:slide2": 0.0, 16 | } 17 | fetch_env.FetchEnv.__init__( 18 | self, 19 | MODEL_XML_PATH, 20 | has_object=False, 21 | block_gripper=True, 22 | n_substeps=20, 23 | gripper_extra_height=0.2, 24 | target_in_the_air=True, 25 | target_offset=0.0, 26 | obj_range=0.15, 27 | target_range=0.15, 28 | distance_threshold=0.05, 29 | initial_qpos=initial_qpos, 30 | reward_type=reward_type, 31 | ) 32 | utils.EzPickle.__init__(self, reward_type=reward_type) 33 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/fetch/slide.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from gym import utils 5 | from gym.envs.robotics import fetch_env 6 | 7 | 8 | # Ensure we get the path separator correct on windows 9 | MODEL_XML_PATH = os.path.join("fetch", "slide.xml") 10 | 11 | 12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle): 13 | def __init__(self, reward_type="sparse"): 14 | initial_qpos = { 15 | "robot0:slide0": 0.05, 16 | "robot0:slide1": 0.48, 17 | "robot0:slide2": 0.0, 18 | "object0:joint": [1.7, 1.1, 0.41, 1.0, 0.0, 0.0, 0.0], 19 | } 20 | fetch_env.FetchEnv.__init__( 21 | self, 22 | MODEL_XML_PATH, 23 | has_object=True, 24 | block_gripper=True, 25 | n_substeps=20, 26 | gripper_extra_height=-0.02, 27 | target_in_the_air=False, 28 | target_offset=np.array([0.4, 0.0, 0.0]), 29 | obj_range=0.1, 30 | target_range=0.3, 31 | distance_threshold=0.05, 32 | initial_qpos=initial_qpos, 33 | reward_type=reward_type, 34 | ) 35 | utils.EzPickle.__init__(self, reward_type=reward_type) 36 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/hand/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/hand/__init__.py -------------------------------------------------------------------------------- /envs/gym/gym/envs/robotics/hand_env.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import numpy as np 4 | 5 | import gym 6 | from gym import error, spaces 7 | from gym.utils import seeding 8 | from gym.envs.robotics import robot_env 9 | 10 | 11 | class HandEnv(robot_env.RobotEnv): 12 | def __init__(self, model_path, n_substeps, initial_qpos, relative_control): 13 | self.relative_control = relative_control 14 | 15 | super(HandEnv, self).__init__( 16 | model_path=model_path, 17 | n_substeps=n_substeps, 18 | n_actions=20, 19 | initial_qpos=initial_qpos, 20 | ) 21 | 22 | # RobotEnv methods 23 | # ---------------------------- 24 | 25 | def _set_action(self, action): 26 | assert action.shape == (20,) 27 | 28 | ctrlrange = self.sim.model.actuator_ctrlrange 29 | actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.0 30 | if self.relative_control: 31 | actuation_center = np.zeros_like(action) 32 | for i in range(self.sim.data.ctrl.shape[0]): 33 | actuation_center[i] = self.sim.data.get_joint_qpos( 34 | self.sim.model.actuator_names[i].replace(":A_", ":") 35 | ) 36 | for joint_name in ["FF", "MF", "RF", "LF"]: 37 | act_idx = self.sim.model.actuator_name2id( 38 | "robot0:A_{}J1".format(joint_name) 39 | ) 40 | actuation_center[act_idx] += self.sim.data.get_joint_qpos( 41 | "robot0:{}J0".format(joint_name) 42 | ) 43 | else: 44 | actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.0 45 | self.sim.data.ctrl[:] = actuation_center + action * actuation_range 46 | self.sim.data.ctrl[:] = np.clip( 47 | self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1] 48 | ) 49 | 50 | def _viewer_setup(self): 51 | body_id = self.sim.model.body_name2id("robot0:palm") 52 | lookat = self.sim.data.body_xpos[body_id] 53 | for idx, value in enumerate(lookat): 54 | self.viewer.cam.lookat[idx] = value 55 | self.viewer.cam.distance = 0.5 56 | self.viewer.cam.azimuth = 55.0 57 | self.viewer.cam.elevation = -25.0 58 | 59 | def render(self, mode="human", width=500, height=500): 60 | return super(HandEnv, self).render(mode, width, height) 61 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/toy_text/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.blackjack import BlackjackEnv 2 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv 3 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv 4 | from gym.envs.toy_text.taxi import TaxiEnv 5 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/toy_text/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import Env, spaces 4 | from gym.utils import seeding 5 | 6 | 7 | def categorical_sample(prob_n, np_random): 8 | """ 9 | Sample from categorical distribution 10 | Each row specifies class probabilities 11 | """ 12 | prob_n = np.asarray(prob_n) 13 | csprob_n = np.cumsum(prob_n) 14 | return (csprob_n > np_random.rand()).argmax() 15 | 16 | 17 | class DiscreteEnv(Env): 18 | 19 | """ 20 | Has the following members 21 | - nS: number of states 22 | - nA: number of actions 23 | - P: transitions (*) 24 | - isd: initial state distribution (**) 25 | 26 | (*) dictionary of lists, where 27 | P[s][a] == [(probability, nextstate, reward, done), ...] 28 | (**) list or array of length nS 29 | 30 | 31 | """ 32 | 33 | def __init__(self, nS, nA, P, isd): 34 | self.P = P 35 | self.isd = isd 36 | self.lastaction = None # for rendering 37 | self.nS = nS 38 | self.nA = nA 39 | 40 | self.action_space = spaces.Discrete(self.nA) 41 | self.observation_space = spaces.Discrete(self.nS) 42 | 43 | self.seed() 44 | self.s = categorical_sample(self.isd, self.np_random) 45 | 46 | def seed(self, seed=None): 47 | self.np_random, seed = seeding.np_random(seed) 48 | return [seed] 49 | 50 | def reset(self): 51 | self.s = categorical_sample(self.isd, self.np_random) 52 | self.lastaction = None 53 | return int(self.s) 54 | 55 | def step(self, a): 56 | transitions = self.P[self.s][a] 57 | i = categorical_sample([t[0] for t in transitions], self.np_random) 58 | p, s, r, d = transitions[i] 59 | self.s = s 60 | self.lastaction = a 61 | return (int(s), r, d, {"prob": p}) 62 | -------------------------------------------------------------------------------- /envs/gym/gym/envs/unittest/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.unittest.cube_crash import CubeCrash 2 | from gym.envs.unittest.cube_crash import CubeCrashSparse 3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack 4 | from gym.envs.unittest.memorize_digits import MemorizeDigits 5 | -------------------------------------------------------------------------------- /envs/gym/gym/logger.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from gym.utils import colorize 4 | 5 | DEBUG = 10 6 | INFO = 20 7 | WARN = 30 8 | ERROR = 40 9 | DISABLED = 50 10 | 11 | MIN_LEVEL = 30 12 | 13 | 14 | def set_level(level): 15 | """ 16 | Set logging threshold on current logger. 17 | """ 18 | global MIN_LEVEL 19 | MIN_LEVEL = level 20 | 21 | 22 | def debug(msg, *args): 23 | if MIN_LEVEL <= DEBUG: 24 | print("%s: %s" % ("DEBUG", msg % args)) 25 | 26 | 27 | def info(msg, *args): 28 | if MIN_LEVEL <= INFO: 29 | print("%s: %s" % ("INFO", msg % args)) 30 | 31 | 32 | def warn(msg, *args): 33 | if MIN_LEVEL <= WARN: 34 | warnings.warn(colorize("%s: %s" % ("WARN", msg % args), "yellow")) 35 | 36 | 37 | def error(msg, *args): 38 | if MIN_LEVEL <= ERROR: 39 | print(colorize("%s: %s" % ("ERROR", msg % args), "red")) 40 | 41 | 42 | # DEPRECATED: 43 | setLevel = set_level 44 | -------------------------------------------------------------------------------- /envs/gym/gym/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.spaces.space import Space 2 | from gym.spaces.box import Box 3 | from gym.spaces.discrete import Discrete 4 | from gym.spaces.multi_discrete import MultiDiscrete 5 | from gym.spaces.multi_binary import MultiBinary 6 | from gym.spaces.tuple import Tuple 7 | from gym.spaces.dict import Dict 8 | 9 | from gym.spaces.utils import flatdim 10 | from gym.spaces.utils import flatten_space 11 | from gym.spaces.utils import flatten 12 | from gym.spaces.utils import unflatten 13 | 14 | __all__ = [ 15 | "Space", 16 | "Box", 17 | "Discrete", 18 | "MultiDiscrete", 19 | "MultiBinary", 20 | "Tuple", 21 | "Dict", 22 | "flatdim", 23 | "flatten_space", 24 | "flatten", 25 | "unflatten", 26 | ] 27 | -------------------------------------------------------------------------------- /envs/gym/gym/spaces/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Discrete(Space): 6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 7 | 8 | Example:: 9 | 10 | >>> Discrete(2) 11 | 12 | """ 13 | 14 | def __init__(self, n, seed=None): 15 | assert n >= 0 16 | self.n = n 17 | super(Discrete, self).__init__((), np.int64, seed) 18 | 19 | def sample(self): 20 | return self.np_random.randint(self.n) 21 | 22 | def contains(self, x): 23 | if isinstance(x, int): 24 | as_int = x 25 | elif isinstance(x, (np.generic, np.ndarray)) and ( 26 | x.dtype.char in np.typecodes["AllInteger"] and x.shape == () 27 | ): 28 | as_int = int(x) 29 | else: 30 | return False 31 | return as_int >= 0 and as_int < self.n 32 | 33 | def __repr__(self): 34 | return "Discrete(%d)" % self.n 35 | 36 | def __eq__(self, other): 37 | return isinstance(other, Discrete) and self.n == other.n 38 | -------------------------------------------------------------------------------- /envs/gym/gym/spaces/multi_binary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class MultiBinary(Space): 6 | """ 7 | An n-shape binary space. 8 | 9 | The argument to MultiBinary defines n, which could be a number or a `list` of numbers. 10 | 11 | Example Usage: 12 | 13 | >> self.observation_space = spaces.MultiBinary(5) 14 | 15 | >> self.observation_space.sample() 16 | 17 | array([0,1,0,1,0], dtype =int8) 18 | 19 | >> self.observation_space = spaces.MultiBinary([3,2]) 20 | 21 | >> self.observation_space.sample() 22 | 23 | array([[0, 0], 24 | [0, 1], 25 | [1, 1]], dtype=int8) 26 | 27 | """ 28 | 29 | def __init__(self, n, seed=None): 30 | self.n = n 31 | if type(n) in [tuple, list, np.ndarray]: 32 | input_n = n 33 | else: 34 | input_n = (n,) 35 | super(MultiBinary, self).__init__(input_n, np.int8, seed) 36 | 37 | def sample(self): 38 | return self.np_random.randint(low=0, high=2, size=self.n, dtype=self.dtype) 39 | 40 | def contains(self, x): 41 | if isinstance(x, list) or isinstance(x, tuple): 42 | x = np.array(x) # Promote list to array for contains check 43 | if self.shape != x.shape: 44 | return False 45 | return ((x == 0) | (x == 1)).all() 46 | 47 | def to_jsonable(self, sample_n): 48 | return np.array(sample_n).tolist() 49 | 50 | def from_jsonable(self, sample_n): 51 | return [np.asarray(sample) for sample in sample_n] 52 | 53 | def __repr__(self): 54 | return "MultiBinary({})".format(self.n) 55 | 56 | def __eq__(self, other): 57 | return isinstance(other, MultiBinary) and self.n == other.n 58 | -------------------------------------------------------------------------------- /envs/gym/gym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | # These submodules should not have any import-time dependencies. 6 | # We want this since we use `utils` during our import-time sanity checks 7 | # that verify that our dependencies are actually present. 8 | from .colorize import colorize 9 | from .ezpickle import EzPickle 10 | -------------------------------------------------------------------------------- /envs/gym/gym/utils/atomic_write.py: -------------------------------------------------------------------------------- 1 | # Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python 2 | 3 | import os 4 | from contextlib import contextmanager 5 | 6 | # We would ideally atomically replace any existing file with the new 7 | # version. However, on Windows there's no Python-only solution prior 8 | # to Python 3.3. (This library includes a C extension to do so: 9 | # https://pypi.python.org/pypi/pyosreplace/0.1.) 10 | # 11 | # Correspondingly, we make a best effort, but on Python < 3.3 use a 12 | # replace method which could result in the file temporarily 13 | # disappearing. 14 | import sys 15 | 16 | if sys.version_info >= (3, 3): 17 | # Python 3.3 and up have a native `replace` method 18 | from os import replace 19 | elif sys.platform.startswith("win"): 20 | 21 | def replace(src, dst): 22 | # TODO: on Windows, this will raise if the file is in use, 23 | # which is possible. We'll need to make this more robust over 24 | # time. 25 | try: 26 | os.remove(dst) 27 | except OSError: 28 | pass 29 | os.rename(src, dst) 30 | 31 | 32 | else: 33 | # POSIX rename() is always atomic 34 | from os import rename as replace 35 | 36 | 37 | @contextmanager 38 | def atomic_write(filepath, binary=False, fsync=False): 39 | """Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked. 40 | 41 | :param filepath: the file path to be opened 42 | :param binary: whether to open the file in a binary mode instead of textual 43 | :param fsync: whether to force write the file to disk 44 | """ 45 | 46 | tmppath = filepath + "~" 47 | while os.path.isfile(tmppath): 48 | tmppath += "~" 49 | try: 50 | with open(tmppath, "wb" if binary else "w") as file: 51 | yield file 52 | if fsync: 53 | file.flush() 54 | os.fsync(file.fileno()) 55 | replace(tmppath, filepath) 56 | finally: 57 | try: 58 | os.remove(tmppath) 59 | except (IOError, OSError): 60 | pass 61 | -------------------------------------------------------------------------------- /envs/gym/gym/utils/closer.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import threading 3 | import weakref 4 | 5 | 6 | class Closer(object): 7 | """A registry that ensures your objects get closed, whether manually, 8 | upon garbage collection, or upon exit. To work properly, your 9 | objects need to cooperate and do something like the following: 10 | 11 | ``` 12 | closer = Closer() 13 | class Example(object): 14 | def __init__(self): 15 | self._id = closer.register(self) 16 | 17 | def close(self): 18 | # Probably worth making idempotent too! 19 | ... 20 | closer.unregister(self._id) 21 | 22 | def __del__(self): 23 | self.close() 24 | ``` 25 | 26 | That is, your objects should: 27 | 28 | - register() themselves and save the returned ID 29 | - unregister() themselves upon close() 30 | - include a __del__ method which close()'s the object 31 | """ 32 | 33 | def __init__(self, atexit_register=True): 34 | self.lock = threading.Lock() 35 | self.next_id = -1 36 | self.closeables = weakref.WeakValueDictionary() 37 | 38 | if atexit_register: 39 | atexit.register(self.close) 40 | 41 | def generate_next_id(self): 42 | with self.lock: 43 | self.next_id += 1 44 | return self.next_id 45 | 46 | def register(self, closeable): 47 | """Registers an object with a 'close' method. 48 | 49 | Returns: 50 | int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired. 51 | """ 52 | assert hasattr(closeable, "close"), "No close method for {}".format(closeable) 53 | 54 | next_id = self.generate_next_id() 55 | self.closeables[next_id] = closeable 56 | return next_id 57 | 58 | def unregister(self, id): 59 | assert id is not None 60 | if id in self.closeables: 61 | del self.closeables[id] 62 | 63 | def close(self): 64 | # Explicitly fetch all monitors first so that they can't disappear while 65 | # we iterate. cf. http://stackoverflow.com/a/12429620 66 | closeables = list(self.closeables.values()) 67 | for closeable in closeables: 68 | closeable.close() 69 | -------------------------------------------------------------------------------- /envs/gym/gym/utils/colorize.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | color2num = dict( 6 | gray=30, 7 | red=31, 8 | green=32, 9 | yellow=33, 10 | blue=34, 11 | magenta=35, 12 | cyan=36, 13 | white=37, 14 | crimson=38, 15 | ) 16 | 17 | 18 | def colorize(string, color, bold=False, highlight=False): 19 | """Return string surrounded by appropriate terminal color codes to 20 | print colorized text. Valid colors: gray, red, green, yellow, 21 | blue, magenta, cyan, white, crimson 22 | """ 23 | 24 | attr = [] 25 | num = color2num[color] 26 | if highlight: 27 | num += 10 28 | attr.append(str(num)) 29 | if bold: 30 | attr.append("1") 31 | attrs = ";".join(attr) 32 | return "\x1b[%sm%s\x1b[0m" % (attrs, string) 33 | -------------------------------------------------------------------------------- /envs/gym/gym/utils/ezpickle.py: -------------------------------------------------------------------------------- 1 | class EzPickle(object): 2 | """Objects that are pickled and unpickled via their constructor 3 | arguments. 4 | 5 | Example usage: 6 | 7 | class Dog(Animal, EzPickle): 8 | def __init__(self, furcolor, tailkind="bushy"): 9 | Animal.__init__() 10 | EzPickle.__init__(furcolor, tailkind) 11 | ... 12 | 13 | When this object is unpickled, a new Dog will be constructed by passing the provided 14 | furcolor and tailkind into the constructor. However, philosophers are still not sure 15 | whether it is still the same dog. 16 | 17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo 18 | and Atari. 19 | """ 20 | 21 | def __init__(self, *args, **kwargs): 22 | self._ezpickle_args = args 23 | self._ezpickle_kwargs = kwargs 24 | 25 | def __getstate__(self): 26 | return { 27 | "_ezpickle_args": self._ezpickle_args, 28 | "_ezpickle_kwargs": self._ezpickle_kwargs, 29 | } 30 | 31 | def __setstate__(self, d): 32 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"]) 33 | self.__dict__.update(out.__dict__) 34 | -------------------------------------------------------------------------------- /envs/gym/gym/utils/json_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def json_encode_np(obj): 5 | """ 6 | JSON can't serialize numpy types, so convert to pure python 7 | """ 8 | if isinstance(obj, np.ndarray): 9 | return list(obj) 10 | elif isinstance(obj, np.float32): 11 | return float(obj) 12 | elif isinstance(obj, np.float64): 13 | return float(obj) 14 | elif isinstance(obj, np.int8): 15 | return int(obj) 16 | elif isinstance(obj, np.int16): 17 | return int(obj) 18 | elif isinstance(obj, np.int32): 19 | return int(obj) 20 | elif isinstance(obj, np.int64): 21 | return int(obj) 22 | else: 23 | return obj 24 | -------------------------------------------------------------------------------- /envs/gym/gym/vector/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from collections.abc import Iterable 3 | except ImportError: 4 | Iterable = (tuple, list) 5 | 6 | from gym.vector.async_vector_env import AsyncVectorEnv 7 | from gym.vector.sync_vector_env import SyncVectorEnv 8 | from gym.vector.vector_env import VectorEnv, VectorEnvWrapper 9 | 10 | __all__ = ["AsyncVectorEnv", "SyncVectorEnv", "VectorEnv", "VectorEnvWrapper", "make"] 11 | 12 | 13 | def make(id, num_envs=1, asynchronous=True, wrappers=None, **kwargs): 14 | """Create a vectorized environment from multiple copies of an environment, 15 | from its id 16 | 17 | Parameters 18 | ---------- 19 | id : str 20 | The environment ID. This must be a valid ID from the registry. 21 | 22 | num_envs : int 23 | Number of copies of the environment. 24 | 25 | asynchronous : bool (default: `True`) 26 | If `True`, wraps the environments in an `AsyncVectorEnv` (which uses 27 | `multiprocessing` to run the environments in parallel). If `False`, 28 | wraps the environments in a `SyncVectorEnv`. 29 | 30 | wrappers : Callable or Iterable of Callables (default: `None`) 31 | If not `None`, then apply the wrappers to each internal 32 | environment during creation. 33 | 34 | Returns 35 | ------- 36 | env : `gym.vector.VectorEnv` instance 37 | The vectorized environment. 38 | 39 | Example 40 | ------- 41 | >>> import gym 42 | >>> env = gym.vector.make('CartPole-v1', 3) 43 | >>> env.reset() 44 | array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827], 45 | [ 0.03073904, 0.00145001, -0.03088818, -0.03131252], 46 | [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]], 47 | dtype=float32) 48 | """ 49 | from gym.envs import make as make_ 50 | 51 | def _make_env(): 52 | env = make_(id, **kwargs) 53 | if wrappers is not None: 54 | if callable(wrappers): 55 | env = wrappers(env) 56 | elif isinstance(wrappers, Iterable) and all( 57 | [callable(w) for w in wrappers] 58 | ): 59 | for wrapper in wrappers: 60 | env = wrapper(env) 61 | else: 62 | raise NotImplementedError 63 | return env 64 | 65 | env_fns = [_make_env for _ in range(num_envs)] 66 | return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns) 67 | -------------------------------------------------------------------------------- /envs/gym/gym/vector/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.vector.utils.misc import CloudpickleWrapper, clear_mpi_env_vars 2 | from gym.vector.utils.numpy_utils import concatenate, create_empty_array 3 | from gym.vector.utils.shared_memory import ( 4 | create_shared_memory, 5 | read_from_shared_memory, 6 | write_to_shared_memory, 7 | ) 8 | from gym.vector.utils.spaces import _BaseGymSpaces, batch_space 9 | 10 | __all__ = [ 11 | "CloudpickleWrapper", 12 | "clear_mpi_env_vars", 13 | "concatenate", 14 | "create_empty_array", 15 | "create_shared_memory", 16 | "read_from_shared_memory", 17 | "write_to_shared_memory", 18 | "_BaseGymSpaces", 19 | "batch_space", 20 | ] 21 | -------------------------------------------------------------------------------- /envs/gym/gym/vector/utils/misc.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | 4 | __all__ = ["CloudpickleWrapper", "clear_mpi_env_vars"] 5 | 6 | 7 | class CloudpickleWrapper(object): 8 | def __init__(self, fn): 9 | self.fn = fn 10 | 11 | def __getstate__(self): 12 | import cloudpickle 13 | 14 | return cloudpickle.dumps(self.fn) 15 | 16 | def __setstate__(self, ob): 17 | import pickle 18 | 19 | self.fn = pickle.loads(ob) 20 | 21 | def __call__(self): 22 | return self.fn() 23 | 24 | 25 | @contextlib.contextmanager 26 | def clear_mpi_env_vars(): 27 | """ 28 | `from mpi4py import MPI` will call `MPI_Init` by default. If the child 29 | process has MPI environment variables, MPI will think that the child process 30 | is an MPI process just like the parent and do bad things such as hang. 31 | 32 | This context manager is a hacky way to clear those environment variables 33 | temporarily such as when we are starting multiprocessing Processes. 34 | """ 35 | removed_environment = {} 36 | for k, v in list(os.environ.items()): 37 | for prefix in ["OMPI_", "PMI_"]: 38 | if k.startswith(prefix): 39 | removed_environment[k] = v 40 | del os.environ[k] 41 | try: 42 | yield 43 | finally: 44 | os.environ.update(removed_environment) 45 | -------------------------------------------------------------------------------- /envs/gym/gym/version.py: -------------------------------------------------------------------------------- 1 | VERSION = "0.21.0" 2 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/README.md: -------------------------------------------------------------------------------- 1 | # Wrappers 2 | 3 | Wrappers are used to transform an environment in a modular way: 4 | 5 | ```python 6 | env = gym.make('Pong-v0') 7 | env = MyWrapper(env) 8 | ``` 9 | 10 | Note that we may later restructure any of the files in this directory, 11 | but will keep the wrappers available at the wrappers' top-level 12 | folder. So for example, you should access `MyWrapper` as follows: 13 | 14 | ```python 15 | from gym.wrappers import MyWrapper 16 | ``` 17 | 18 | ## Quick tips for writing your own wrapper 19 | 20 | - Don't forget to call `super(class_name, self).__init__(env)` if you override the wrapper's `__init__` function 21 | - You can access the inner environment with `self.unwrapped` 22 | - You can access the previous layer using `self.env` 23 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer 24 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `step`, `reset`, `render`, `close`, or `seed` 25 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`) 26 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.wrappers.monitor import Monitor 3 | from gym.wrappers.time_limit import TimeLimit 4 | from gym.wrappers.filter_observation import FilterObservation 5 | from gym.wrappers.atari_preprocessing import AtariPreprocessing 6 | from gym.wrappers.time_aware_observation import TimeAwareObservation 7 | from gym.wrappers.rescale_action import RescaleAction 8 | from gym.wrappers.flatten_observation import FlattenObservation 9 | from gym.wrappers.gray_scale_observation import GrayScaleObservation 10 | from gym.wrappers.frame_stack import LazyFrames 11 | from gym.wrappers.frame_stack import FrameStack 12 | from gym.wrappers.transform_observation import TransformObservation 13 | from gym.wrappers.transform_reward import TransformReward 14 | from gym.wrappers.resize_observation import ResizeObservation 15 | from gym.wrappers.clip_action import ClipAction 16 | from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics 17 | from gym.wrappers.normalize import NormalizeObservation, NormalizeReward 18 | from gym.wrappers.record_video import RecordVideo, capped_cubic_video_schedule 19 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/clip_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import ActionWrapper 3 | from gym.spaces import Box 4 | 5 | 6 | class ClipAction(ActionWrapper): 7 | r"""Clip the continuous action within the valid bound.""" 8 | 9 | def __init__(self, env): 10 | assert isinstance(env.action_space, Box) 11 | super(ClipAction, self).__init__(env) 12 | 13 | def action(self, action): 14 | return np.clip(action, self.action_space.low, self.action_space.high) 15 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/filter_observation.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from gym import spaces 3 | from gym import ObservationWrapper 4 | 5 | 6 | class FilterObservation(ObservationWrapper): 7 | """Filter dictionary observations by their keys. 8 | 9 | Args: 10 | env: The environment to wrap. 11 | filter_keys: List of keys to be included in the observations. 12 | 13 | Raises: 14 | ValueError: If observation keys in not instance of None or 15 | iterable. 16 | ValueError: If any of the `filter_keys` are not included in 17 | the original `env`'s observation space 18 | 19 | """ 20 | 21 | def __init__(self, env, filter_keys=None): 22 | super(FilterObservation, self).__init__(env) 23 | 24 | wrapped_observation_space = env.observation_space 25 | assert isinstance( 26 | wrapped_observation_space, spaces.Dict 27 | ), "FilterObservationWrapper is only usable with dict observations." 28 | 29 | observation_keys = wrapped_observation_space.spaces.keys() 30 | 31 | if filter_keys is None: 32 | filter_keys = tuple(observation_keys) 33 | 34 | missing_keys = set(key for key in filter_keys if key not in observation_keys) 35 | 36 | if missing_keys: 37 | raise ValueError( 38 | "All the filter_keys must be included in the " 39 | "original obsrevation space.\n" 40 | "Filter keys: {filter_keys}\n" 41 | "Observation keys: {observation_keys}\n" 42 | "Missing keys: {missing_keys}".format( 43 | filter_keys=filter_keys, 44 | observation_keys=observation_keys, 45 | missing_keys=missing_keys, 46 | ) 47 | ) 48 | 49 | self.observation_space = type(wrapped_observation_space)( 50 | [ 51 | (name, copy.deepcopy(space)) 52 | for name, space in wrapped_observation_space.spaces.items() 53 | if name in filter_keys 54 | ] 55 | ) 56 | 57 | self._env = env 58 | self._filter_keys = tuple(filter_keys) 59 | 60 | def observation(self, observation): 61 | filter_observation = self._filter_observation(observation) 62 | return filter_observation 63 | 64 | def _filter_observation(self, observation): 65 | observation = type(observation)( 66 | [ 67 | (name, value) 68 | for name, value in observation.items() 69 | if name in self._filter_keys 70 | ] 71 | ) 72 | return observation 73 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/flatten_observation.py: -------------------------------------------------------------------------------- 1 | import gym.spaces as spaces 2 | from gym import ObservationWrapper 3 | 4 | 5 | class FlattenObservation(ObservationWrapper): 6 | r"""Observation wrapper that flattens the observation.""" 7 | 8 | def __init__(self, env): 9 | super(FlattenObservation, self).__init__(env) 10 | self.observation_space = spaces.flatten_space(env.observation_space) 11 | 12 | def observation(self, observation): 13 | return spaces.flatten(self.env.observation_space, observation) 14 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/gray_scale_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.spaces import Box 3 | from gym import ObservationWrapper 4 | 5 | 6 | class GrayScaleObservation(ObservationWrapper): 7 | r"""Convert the image observation from RGB to gray scale.""" 8 | 9 | def __init__(self, env, keep_dim=False): 10 | super(GrayScaleObservation, self).__init__(env) 11 | self.keep_dim = keep_dim 12 | 13 | assert ( 14 | len(env.observation_space.shape) == 3 15 | and env.observation_space.shape[-1] == 3 16 | ) 17 | 18 | obs_shape = self.observation_space.shape[:2] 19 | if self.keep_dim: 20 | self.observation_space = Box( 21 | low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8 22 | ) 23 | else: 24 | self.observation_space = Box( 25 | low=0, high=255, shape=obs_shape, dtype=np.uint8 26 | ) 27 | 28 | def observation(self, observation): 29 | import cv2 30 | 31 | observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY) 32 | if self.keep_dim: 33 | observation = np.expand_dims(observation, -1) 34 | return observation 35 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/monitoring/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/wrappers/monitoring/__init__.py -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/order_enforcing.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | 4 | class OrderEnforcing(gym.Wrapper): 5 | def __init__(self, env): 6 | super(OrderEnforcing, self).__init__(env) 7 | self._has_reset = False 8 | 9 | def step(self, action): 10 | assert self._has_reset, "Cannot call env.step() before calling reset()" 11 | observation, reward, done, info = self.env.step(action) 12 | return observation, reward, done, info 13 | 14 | def reset(self, **kwargs): 15 | self._has_reset = True 16 | return self.env.reset(**kwargs) 17 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/record_episode_statistics.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import deque 3 | import numpy as np 4 | import gym 5 | 6 | 7 | class RecordEpisodeStatistics(gym.Wrapper): 8 | def __init__(self, env, deque_size=100): 9 | super(RecordEpisodeStatistics, self).__init__(env) 10 | self.num_envs = getattr(env, "num_envs", 1) 11 | self.t0 = time.perf_counter() 12 | self.episode_count = 0 13 | self.episode_returns = None 14 | self.episode_lengths = None 15 | self.return_queue = deque(maxlen=deque_size) 16 | self.length_queue = deque(maxlen=deque_size) 17 | self.is_vector_env = getattr(env, "is_vector_env", False) 18 | 19 | def reset(self, **kwargs): 20 | observations = super(RecordEpisodeStatistics, self).reset(**kwargs) 21 | self.episode_returns = np.zeros(self.num_envs, dtype=np.float32) 22 | self.episode_lengths = np.zeros(self.num_envs, dtype=np.int32) 23 | return observations 24 | 25 | def step(self, action): 26 | observations, rewards, dones, infos = super(RecordEpisodeStatistics, self).step( 27 | action 28 | ) 29 | self.episode_returns += rewards 30 | self.episode_lengths += 1 31 | if not self.is_vector_env: 32 | infos = [infos] 33 | dones = [dones] 34 | for i in range(len(dones)): 35 | if dones[i]: 36 | infos[i] = infos[i].copy() 37 | episode_return = self.episode_returns[i] 38 | episode_length = self.episode_lengths[i] 39 | episode_info = { 40 | "r": episode_return, 41 | "l": episode_length, 42 | "t": round(time.perf_counter() - self.t0, 6), 43 | } 44 | infos[i]["episode"] = episode_info 45 | self.return_queue.append(episode_return) 46 | self.length_queue.append(episode_length) 47 | self.episode_count += 1 48 | self.episode_returns[i] = 0 49 | self.episode_lengths[i] = 0 50 | return ( 51 | observations, 52 | rewards, 53 | dones if self.is_vector_env else dones[0], 54 | infos if self.is_vector_env else infos[0], 55 | ) 56 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/rescale_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | from gym import spaces 4 | 5 | 6 | class RescaleAction(gym.ActionWrapper): 7 | r"""Rescales the continuous action space of the environment to a range [min_action, max_action]. 8 | 9 | Example:: 10 | 11 | >>> RescaleAction(env, min_action, max_action).action_space == Box(min_action, max_action) 12 | True 13 | 14 | """ 15 | 16 | def __init__(self, env, min_action, max_action): 17 | assert isinstance( 18 | env.action_space, spaces.Box 19 | ), "expected Box action space, got {}".format(type(env.action_space)) 20 | assert np.less_equal(min_action, max_action).all(), (min_action, max_action) 21 | 22 | super(RescaleAction, self).__init__(env) 23 | self.min_action = ( 24 | np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + min_action 25 | ) 26 | self.max_action = ( 27 | np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + max_action 28 | ) 29 | self.action_space = spaces.Box( 30 | low=min_action, 31 | high=max_action, 32 | shape=env.action_space.shape, 33 | dtype=env.action_space.dtype, 34 | ) 35 | 36 | def action(self, action): 37 | assert np.all(np.greater_equal(action, self.min_action)), ( 38 | action, 39 | self.min_action, 40 | ) 41 | assert np.all(np.less_equal(action, self.max_action)), (action, self.max_action) 42 | low = self.env.action_space.low 43 | high = self.env.action_space.high 44 | action = low + (high - low) * ( 45 | (action - self.min_action) / (self.max_action - self.min_action) 46 | ) 47 | action = np.clip(action, low, high) 48 | return action 49 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/resize_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.spaces import Box 3 | from gym import ObservationWrapper 4 | 5 | 6 | class ResizeObservation(ObservationWrapper): 7 | r"""Downsample the image observation to a square image.""" 8 | 9 | def __init__(self, env, shape): 10 | super(ResizeObservation, self).__init__(env) 11 | if isinstance(shape, int): 12 | shape = (shape, shape) 13 | assert all(x > 0 for x in shape), shape 14 | 15 | self.shape = tuple(shape) 16 | 17 | obs_shape = self.shape + self.observation_space.shape[2:] 18 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8) 19 | 20 | def observation(self, observation): 21 | import cv2 22 | 23 | observation = cv2.resize( 24 | observation, self.shape[::-1], interpolation=cv2.INTER_AREA 25 | ) 26 | if observation.ndim == 2: 27 | observation = np.expand_dims(observation, -1) 28 | return observation 29 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/time_aware_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.spaces import Box 3 | from gym import ObservationWrapper 4 | 5 | 6 | class TimeAwareObservation(ObservationWrapper): 7 | r"""Augment the observation with current time step in the trajectory. 8 | 9 | .. note:: 10 | Currently it only works with one-dimensional observation space. It doesn't 11 | support pixel observation space yet. 12 | 13 | """ 14 | 15 | def __init__(self, env): 16 | super(TimeAwareObservation, self).__init__(env) 17 | assert isinstance(env.observation_space, Box) 18 | assert env.observation_space.dtype == np.float32 19 | low = np.append(self.observation_space.low, 0.0) 20 | high = np.append(self.observation_space.high, np.inf) 21 | self.observation_space = Box(low, high, dtype=np.float32) 22 | 23 | def observation(self, observation): 24 | return np.append(observation, self.t) 25 | 26 | def step(self, action): 27 | self.t += 1 28 | return super(TimeAwareObservation, self).step(action) 29 | 30 | def reset(self, **kwargs): 31 | self.t = 0 32 | return super(TimeAwareObservation, self).reset(**kwargs) 33 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/time_limit.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | 4 | class TimeLimit(gym.Wrapper): 5 | def __init__(self, env, max_episode_steps=None): 6 | super(TimeLimit, self).__init__(env) 7 | if max_episode_steps is None and self.env.spec is not None: 8 | max_episode_steps = env.spec.max_episode_steps 9 | if self.env.spec is not None: 10 | self.env.spec.max_episode_steps = max_episode_steps 11 | self._max_episode_steps = max_episode_steps 12 | self._elapsed_steps = None 13 | 14 | def step(self, action): 15 | assert ( 16 | self._elapsed_steps is not None 17 | ), "Cannot call env.step() before calling reset()" 18 | observation, reward, done, info = self.env.step(action) 19 | self._elapsed_steps += 1 20 | if self._elapsed_steps >= self._max_episode_steps: 21 | info["TimeLimit.truncated"] = not done 22 | done = True 23 | return observation, reward, done, info 24 | 25 | def reset(self, **kwargs): 26 | self._elapsed_steps = 0 27 | return self.env.reset(**kwargs) 28 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/transform_observation.py: -------------------------------------------------------------------------------- 1 | from gym import ObservationWrapper 2 | 3 | 4 | class TransformObservation(ObservationWrapper): 5 | r"""Transform the observation via an arbitrary function. 6 | 7 | Example:: 8 | 9 | >>> import gym 10 | >>> env = gym.make('CartPole-v1') 11 | >>> env = TransformObservation(env, lambda obs: obs + 0.1*np.random.randn(*obs.shape)) 12 | >>> env.reset() 13 | array([-0.08319338, 0.04635121, -0.07394746, 0.20877492]) 14 | 15 | Args: 16 | env (Env): environment 17 | f (callable): a function that transforms the observation 18 | 19 | """ 20 | 21 | def __init__(self, env, f): 22 | super(TransformObservation, self).__init__(env) 23 | assert callable(f) 24 | self.f = f 25 | 26 | def observation(self, observation): 27 | return self.f(observation) 28 | -------------------------------------------------------------------------------- /envs/gym/gym/wrappers/transform_reward.py: -------------------------------------------------------------------------------- 1 | from gym import RewardWrapper 2 | 3 | 4 | class TransformReward(RewardWrapper): 5 | r"""Transform the reward via an arbitrary function. 6 | 7 | Example:: 8 | 9 | >>> import gym 10 | >>> env = gym.make('CartPole-v1') 11 | >>> env = TransformReward(env, lambda r: 0.01*r) 12 | >>> env.reset() 13 | >>> observation, reward, done, info = env.step(env.action_space.sample()) 14 | >>> reward 15 | 0.01 16 | 17 | Args: 18 | env (Env): environment 19 | f (callable): a function that transforms the reward 20 | 21 | """ 22 | 23 | def __init__(self, env, f): 24 | super(TransformReward, self).__init__(env) 25 | assert callable(f) 26 | self.f = f 27 | 28 | def reward(self, reward): 29 | return self.f(reward) 30 | -------------------------------------------------------------------------------- /envs/gym/py.Dockerfile: -------------------------------------------------------------------------------- 1 | # A Dockerfile that sets up a full Gym install with test dependencies 2 | ARG PYTHON_VERSION 3 | FROM python:$PYTHON_VERSION 4 | RUN apt-get -y update && apt-get install -y unzip libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig 5 | 6 | # Download mujoco 7 | RUN mkdir /root/.mujoco && \ 8 | cd /root/.mujoco && \ 9 | curl -O https://www.roboti.us/download/mjpro150_linux.zip && \ 10 | unzip mjpro150_linux.zip && \ 11 | echo DUMMY_KEY > /root/.mujoco/mjkey.txt 12 | 13 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin 14 | 15 | COPY . /usr/local/gym/ 16 | WORKDIR /usr/local/gym/ 17 | 18 | RUN pip install .[nomujoco,accept-rom-license] && pip install -r test_requirements.txt 19 | 20 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] 21 | -------------------------------------------------------------------------------- /envs/gym/requirements.txt: -------------------------------------------------------------------------------- 1 | ale-py~=0.7 2 | opencv-python>=3. 3 | box2d-py==2.3.5 4 | mujoco_py>=1.50, <2.0 5 | scipy>=1.4.1 6 | numpy>=1.18.0 7 | pyglet>=1.4.0 8 | cloudpickle>=1.2.0 9 | lz4>=3.1.0 10 | -------------------------------------------------------------------------------- /envs/gym/setup.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import sys 3 | import itertools 4 | 5 | from setuptools import find_packages, setup 6 | 7 | # Don't import gym module here, since deps may not be installed 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "gym")) 9 | from version import VERSION 10 | 11 | # Environment-specific dependencies. 12 | extras = { 13 | "atari": ["ale-py~=0.7.1"], 14 | "accept-rom-license": ["autorom[accept-rom-license]~=0.4.2"], 15 | "box2d": ["box2d-py==2.3.5", "pyglet>=1.4.0"], 16 | "classic_control": ["pyglet>=1.4.0"], 17 | "mujoco": ["mujoco_py>=1.50, <2.0"], 18 | "robotics": ["mujoco_py>=1.50, <2.0"], 19 | "toy_text": ["scipy>=1.4.1"], 20 | "other": ["lz4>=3.1.0", "opencv-python>=3."], 21 | } 22 | 23 | # Meta dependency groups. 24 | nomujoco_blacklist = set(["mujoco", "robotics", "accept-rom-license"]) 25 | nomujoco_groups = set(extras.keys()) - nomujoco_blacklist 26 | 27 | extras["nomujoco"] = list( 28 | itertools.chain.from_iterable(map(lambda group: extras[group], nomujoco_groups)) 29 | ) 30 | 31 | 32 | all_blacklist = set(["accept-rom-license"]) 33 | all_groups = set(extras.keys()) - all_blacklist 34 | 35 | extras["all"] = list( 36 | itertools.chain.from_iterable(map(lambda group: extras[group], all_groups)) 37 | ) 38 | 39 | setup( 40 | name="gym", 41 | version=VERSION, 42 | description="Gym: A universal API for reinforcement learning environments.", 43 | url="https://github.com/openai/gym", 44 | author="OpenAI", 45 | author_email="jkterry@umd.edu", 46 | license="", 47 | packages=[package for package in find_packages() if package.startswith("gym")], 48 | zip_safe=False, 49 | install_requires=[ 50 | "numpy>=1.18.0", 51 | "cloudpickle>=1.2.0", 52 | "importlib_metadata>=4.8.1; python_version < '3.8'", 53 | ], 54 | extras_require=extras, 55 | package_data={ 56 | "gym": [ 57 | "envs/mujoco/assets/*.xml", 58 | "envs/classic_control/assets/*.png", 59 | "envs/robotics/assets/LICENSE.md", 60 | "envs/robotics/assets/fetch/*.xml", 61 | "envs/robotics/assets/hand/*.xml", 62 | "envs/robotics/assets/stls/fetch/*.stl", 63 | "envs/robotics/assets/stls/hand/*.stl", 64 | "envs/robotics/assets/textures/*.png", 65 | ] 66 | }, 67 | tests_require=["pytest", "mock"], 68 | python_requires=">=3.6", 69 | classifiers=[ 70 | "Programming Language :: Python :: 3", 71 | "Programming Language :: Python :: 3.6", 72 | "Programming Language :: Python :: 3.7", 73 | "Programming Language :: Python :: 3.8", 74 | "Programming Language :: Python :: 3.9", 75 | ], 76 | ) 77 | -------------------------------------------------------------------------------- /envs/gym/test_requirements.txt: -------------------------------------------------------------------------------- 1 | lz4~=3.1 2 | pytest~=6.2 3 | pytest-forked~=1.3 4 | -------------------------------------------------------------------------------- /envs/gym/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/envs/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/envs/robotics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/envs/robotics/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/envs/robotics/hand/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/envs/robotics/hand/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/envs/robotics/hand/test_manipulate.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from gym import envs 6 | from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE 7 | 8 | 9 | ENVIRONMENT_IDS = ( 10 | "HandManipulateEgg-v0", 11 | "HandManipulatePen-v0", 12 | "HandManipulateBlock-v0", 13 | ) 14 | 15 | 16 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE) 17 | @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS) 18 | def test_serialize_deserialize(environment_id): 19 | env1 = envs.make(environment_id, target_position="fixed") 20 | env1.reset() 21 | env2 = pickle.loads(pickle.dumps(env1)) 22 | 23 | assert env1.target_position == env2.target_position, ( 24 | env1.target_position, 25 | env2.target_position, 26 | ) 27 | -------------------------------------------------------------------------------- /envs/gym/tests/envs/robotics/hand/test_manipulate_touch_sensors.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from gym import envs 6 | from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE 7 | 8 | 9 | ENVIRONMENT_IDS = ( 10 | "HandManipulateEggTouchSensors-v1", 11 | "HandManipulatePenTouchSensors-v0", 12 | "HandManipulateBlockTouchSensors-v0", 13 | ) 14 | 15 | 16 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE) 17 | @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS) 18 | def test_serialize_deserialize(environment_id): 19 | env1 = envs.make(environment_id, target_position="fixed") 20 | env1.reset() 21 | env2 = pickle.loads(pickle.dumps(env1)) 22 | 23 | assert env1.target_position == env2.target_position, ( 24 | env1.target_position, 25 | env2.target_position, 26 | ) 27 | -------------------------------------------------------------------------------- /envs/gym/tests/envs/robotics/hand/test_reach.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from gym import envs 6 | from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE 7 | 8 | 9 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE) 10 | def test_serialize_deserialize(): 11 | env1 = envs.make("HandReach-v0", distance_threshold=1e-6) 12 | env1.reset() 13 | env2 = pickle.loads(pickle.dumps(env1)) 14 | 15 | assert env1.distance_threshold == env2.distance_threshold, ( 16 | env1.distance_threshold, 17 | env2.distance_threshold, 18 | ) 19 | -------------------------------------------------------------------------------- /envs/gym/tests/envs/rollout.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /envs/gym/tests/envs/spec_list.py: -------------------------------------------------------------------------------- 1 | from gym import envs, logger 2 | import os 3 | 4 | 5 | SKIP_MUJOCO_WARNING_MESSAGE = ( 6 | "Cannot run mujoco test (either license key not found or mujoco not" 7 | "installed properly)." 8 | ) 9 | 10 | 11 | skip_mujoco = not (os.environ.get("MUJOCO_KEY")) 12 | if not skip_mujoco: 13 | try: 14 | import mujoco_py 15 | except ImportError: 16 | skip_mujoco = True 17 | 18 | 19 | def should_skip_env_spec_for_tests(spec): 20 | # We skip tests for envs that require dependencies or are otherwise 21 | # troublesome to run frequently 22 | ep = spec.entry_point 23 | # Skip mujoco tests for pull request CI 24 | if skip_mujoco and ( 25 | ep.startswith("gym.envs.mujoco") or ep.startswith("gym.envs.robotics:") 26 | ): 27 | return True 28 | try: 29 | import gym.envs.atari 30 | except ImportError: 31 | if ep.startswith("gym.envs.atari"): 32 | return True 33 | try: 34 | import Box2D 35 | except ImportError: 36 | if ep.startswith("gym.envs.box2d"): 37 | return True 38 | 39 | if ( 40 | "GoEnv" in ep 41 | or "HexEnv" in ep 42 | or ( 43 | ep.startswith("gym.envs.atari") 44 | and not spec.id.startswith("Pong") 45 | and not spec.id.startswith("Seaquest") 46 | ) 47 | ): 48 | logger.warn("Skipping tests for env {}".format(ep)) 49 | return True 50 | return False 51 | 52 | 53 | spec_list = [ 54 | spec 55 | for spec in sorted(envs.registry.all(), key=lambda x: x.id) 56 | if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec) 57 | ] 58 | -------------------------------------------------------------------------------- /envs/gym/tests/envs/test_frozenlake_dfs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym.envs.toy_text.frozen_lake import generate_random_map 5 | 6 | 7 | # Test that FrozenLake map generation creates valid maps of various sizes. 8 | def test_frozenlake_dfs_map_generation(): 9 | def frozenlake_dfs_path_exists(res): 10 | frontier, discovered = [], set() 11 | frontier.append((0, 0)) 12 | while frontier: 13 | r, c = frontier.pop() 14 | if not (r, c) in discovered: 15 | discovered.add((r, c)) 16 | directions = [(1, 0), (0, 1), (-1, 0), (0, -1)] 17 | for x, y in directions: 18 | r_new = r + x 19 | c_new = c + y 20 | if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size: 21 | continue 22 | if res[r_new][c_new] == "G": 23 | return True 24 | if res[r_new][c_new] not in "#H": 25 | frontier.append((r_new, c_new)) 26 | return False 27 | 28 | map_sizes = [5, 10, 200] 29 | for size in map_sizes: 30 | new_frozenlake = generate_random_map(size) 31 | assert len(new_frozenlake) == size 32 | assert len(new_frozenlake[0]) == size 33 | assert frozenlake_dfs_path_exists(new_frozenlake) 34 | -------------------------------------------------------------------------------- /envs/gym/tests/envs/test_lunar_lander.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | try: 4 | import Box2D 5 | from gym.envs.box2d.lunar_lander import ( 6 | LunarLander, 7 | LunarLanderContinuous, 8 | demo_heuristic_lander, 9 | ) 10 | except ImportError: 11 | Box2D = None 12 | 13 | 14 | @pytest.mark.skipif(Box2D is None, reason="Box2D not installed") 15 | def test_lunar_lander(): 16 | _test_lander(LunarLander(), seed=0) 17 | 18 | 19 | @pytest.mark.skipif(Box2D is None, reason="Box2D not installed") 20 | def test_lunar_lander_continuous(): 21 | _test_lander(LunarLanderContinuous(), seed=0) 22 | 23 | 24 | @pytest.mark.skipif(Box2D is None, reason="Box2D not installed") 25 | def _test_lander(env, seed=None, render=False): 26 | total_reward = demo_heuristic_lander(env, seed=seed, render=render) 27 | assert total_reward > 100 28 | -------------------------------------------------------------------------------- /envs/gym/tests/envs/test_mujoco_v2_to_v3_conversion.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from gym import envs 4 | from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE 5 | 6 | 7 | def verify_environments_match( 8 | old_environment_id, new_environment_id, seed=1, num_actions=1000 9 | ): 10 | old_environment = envs.make(old_environment_id) 11 | new_environment = envs.make(new_environment_id) 12 | 13 | old_environment.seed(seed) 14 | new_environment.seed(seed) 15 | 16 | old_reset_observation = old_environment.reset() 17 | new_reset_observation = new_environment.reset() 18 | 19 | np.testing.assert_allclose(old_reset_observation, new_reset_observation) 20 | 21 | for i in range(num_actions): 22 | action = old_environment.action_space.sample() 23 | old_observation, old_reward, old_done, old_info = old_environment.step(action) 24 | new_observation, new_reward, new_done, new_info = new_environment.step(action) 25 | 26 | eps = 1e-6 27 | np.testing.assert_allclose(old_observation, new_observation, atol=eps) 28 | np.testing.assert_allclose(old_reward, new_reward, atol=eps) 29 | np.testing.assert_allclose(old_done, new_done, atol=eps) 30 | 31 | for key in old_info: 32 | np.testing.assert_allclose(old_info[key], new_info[key], atol=eps) 33 | 34 | 35 | @unittest.skipIf(skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE) 36 | class Mujocov2Tov3ConversionTest(unittest.TestCase): 37 | def test_environments_match(self): 38 | test_cases = ( 39 | {"old_id": "Swimmer-v2", "new_id": "Swimmer-v3"}, 40 | {"old_id": "Hopper-v2", "new_id": "Hopper-v3"}, 41 | {"old_id": "Walker2d-v2", "new_id": "Walker2d-v3"}, 42 | {"old_id": "HalfCheetah-v2", "new_id": "HalfCheetah-v3"}, 43 | {"old_id": "Ant-v2", "new_id": "Ant-v3"}, 44 | {"old_id": "Humanoid-v2", "new_id": "Humanoid-v3"}, 45 | ) 46 | 47 | for test_case in test_cases: 48 | verify_environments_match(test_case["old_id"], test_case["new_id"]) 49 | 50 | # Raises KeyError because the new envs have extra info 51 | with self.assertRaises(KeyError): 52 | verify_environments_match("Swimmer-v3", "Swimmer-v2") 53 | 54 | # Raises KeyError because the new envs have extra info 55 | with self.assertRaises(KeyError): 56 | verify_environments_match("Humanoid-v3", "Humanoid-v2") 57 | 58 | # Raises KeyError because the new envs have extra info 59 | with self.assertRaises(KeyError): 60 | verify_environments_match("Swimmer-v3", "Swimmer-v2") 61 | 62 | 63 | if __name__ == "__main__": 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /envs/gym/tests/envs/test_registration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import gym 3 | from gym import error, envs 4 | from gym.envs import registration 5 | from gym.envs.classic_control import cartpole 6 | 7 | 8 | class ArgumentEnv(gym.Env): 9 | def __init__(self, arg1, arg2, arg3): 10 | self.arg1 = arg1 11 | self.arg2 = arg2 12 | self.arg3 = arg3 13 | 14 | 15 | gym.register( 16 | id="test.ArgumentEnv-v0", 17 | entry_point="tests.envs.test_registration:ArgumentEnv", 18 | kwargs={ 19 | "arg1": "arg1", 20 | "arg2": "arg2", 21 | }, 22 | ) 23 | 24 | 25 | def test_make(): 26 | env = envs.make("CartPole-v0") 27 | assert env.spec.id == "CartPole-v0" 28 | assert isinstance(env.unwrapped, cartpole.CartPoleEnv) 29 | 30 | 31 | def test_make_with_kwargs(): 32 | env = envs.make("test.ArgumentEnv-v0", arg2="override_arg2", arg3="override_arg3") 33 | assert env.spec.id == "test.ArgumentEnv-v0" 34 | assert isinstance(env.unwrapped, ArgumentEnv) 35 | assert env.arg1 == "arg1" 36 | assert env.arg2 == "override_arg2" 37 | assert env.arg3 == "override_arg3" 38 | 39 | 40 | def test_make_deprecated(): 41 | try: 42 | envs.make("Humanoid-v0") 43 | except error.Error: 44 | pass 45 | else: 46 | assert False 47 | 48 | 49 | def test_spec(): 50 | spec = envs.spec("CartPole-v0") 51 | assert spec.id == "CartPole-v0" 52 | 53 | 54 | def test_spec_with_kwargs(): 55 | map_name_value = "8x8" 56 | env = gym.make("FrozenLake-v1", map_name=map_name_value) 57 | assert env.spec._kwargs["map_name"] == map_name_value 58 | 59 | 60 | def test_missing_lookup(): 61 | registry = registration.EnvRegistry() 62 | registry.register(id="Test-v0", entry_point=None) 63 | registry.register(id="Test-v15", entry_point=None) 64 | registry.register(id="Test-v9", entry_point=None) 65 | registry.register(id="Other-v100", entry_point=None) 66 | try: 67 | registry.spec("Test-v1") # must match an env name but not the version above 68 | except error.DeprecatedEnv: 69 | pass 70 | else: 71 | assert False 72 | 73 | try: 74 | registry.spec("Unknown-v1") 75 | except error.UnregisteredEnv: 76 | pass 77 | else: 78 | assert False 79 | 80 | 81 | def test_malformed_lookup(): 82 | registry = registration.EnvRegistry() 83 | try: 84 | registry.spec(u"“Breakout-v0”") 85 | except error.Error as e: 86 | assert "malformed environment ID" in "{}".format( 87 | e 88 | ), "Unexpected message: {}".format(e) 89 | else: 90 | assert False 91 | -------------------------------------------------------------------------------- /envs/gym/tests/spaces/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/spaces/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/utils/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/utils/test_atexit.py: -------------------------------------------------------------------------------- 1 | from gym.utils.closer import Closer 2 | 3 | 4 | class Closeable(object): 5 | close_called = False 6 | 7 | def close(self): 8 | self.close_called = True 9 | 10 | 11 | def test_register_unregister(): 12 | registry = Closer(atexit_register=False) 13 | c1 = Closeable() 14 | c2 = Closeable() 15 | 16 | assert not c1.close_called 17 | assert not c2.close_called 18 | registry.register(c1) 19 | id2 = registry.register(c2) 20 | 21 | registry.unregister(id2) 22 | registry.close() 23 | assert c1.close_called 24 | assert not c2.close_called 25 | -------------------------------------------------------------------------------- /envs/gym/tests/utils/test_env_checker.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import pytest 4 | from gym.spaces import Box, Dict, Discrete 5 | 6 | from gym.utils.env_checker import check_env 7 | 8 | 9 | class ActionDictTestEnv(gym.Env): 10 | action_space = Dict({"position": Discrete(1), "velocity": Discrete(1)}) 11 | observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32) 12 | 13 | def step(self, action): 14 | observation = np.array([1.0, 1.5, 0.5]) 15 | reward = 1 16 | done = True 17 | return observation, reward, done 18 | 19 | def reset(self): 20 | return np.array([1.0, 1.5, 0.5]) 21 | 22 | def render(self, mode="human"): 23 | pass 24 | 25 | 26 | def test_check_env_dict_action(): 27 | # Environment.step() only returns 3 values: obs, reward, done. Not info! 28 | test_env = ActionDictTestEnv() 29 | 30 | with pytest.raises(AssertionError) as errorinfo: 31 | check_env(env=test_env, warn=True) 32 | assert ( 33 | str(errorinfo.value) 34 | == "The `step()` method must return four values: obs, reward, done, info" 35 | ) 36 | -------------------------------------------------------------------------------- /envs/gym/tests/utils/test_seeding.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.utils import seeding 3 | 4 | 5 | def test_invalid_seeds(): 6 | for seed in [-1, "test"]: 7 | try: 8 | seeding.np_random(seed) 9 | except error.Error: 10 | pass 11 | else: 12 | assert False, "Invalid seed {} passed validation".format(seed) 13 | 14 | 15 | def test_valid_seeds(): 16 | for seed in [0, 1]: 17 | random, seed1 = seeding.np_random(seed) 18 | assert seed == seed1 19 | -------------------------------------------------------------------------------- /envs/gym/tests/vector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/vector/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/vector/test_vector_env.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym.spaces import Tuple 5 | from tests.vector.utils import CustomSpace, make_env 6 | 7 | from gym.vector.async_vector_env import AsyncVectorEnv 8 | from gym.vector.sync_vector_env import SyncVectorEnv 9 | from gym.vector.vector_env import VectorEnv 10 | 11 | 12 | @pytest.mark.parametrize("shared_memory", [True, False]) 13 | def test_vector_env_equal(shared_memory): 14 | env_fns = [make_env("CubeCrash-v0", i) for i in range(4)] 15 | num_steps = 100 16 | try: 17 | async_env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) 18 | sync_env = SyncVectorEnv(env_fns) 19 | 20 | async_env.seed(0) 21 | sync_env.seed(0) 22 | 23 | assert async_env.num_envs == sync_env.num_envs 24 | assert async_env.observation_space == sync_env.observation_space 25 | assert async_env.single_observation_space == sync_env.single_observation_space 26 | assert async_env.action_space == sync_env.action_space 27 | assert async_env.single_action_space == sync_env.single_action_space 28 | 29 | async_observations = async_env.reset() 30 | sync_observations = sync_env.reset() 31 | assert np.all(async_observations == sync_observations) 32 | 33 | for _ in range(num_steps): 34 | actions = async_env.action_space.sample() 35 | assert actions in sync_env.action_space 36 | 37 | async_observations, async_rewards, async_dones, _ = async_env.step(actions) 38 | sync_observations, sync_rewards, sync_dones, _ = sync_env.step(actions) 39 | 40 | assert np.all(async_observations == sync_observations) 41 | assert np.all(async_rewards == sync_rewards) 42 | assert np.all(async_dones == sync_dones) 43 | 44 | finally: 45 | async_env.close() 46 | sync_env.close() 47 | 48 | 49 | def test_custom_space_vector_env(): 50 | env = VectorEnv(4, CustomSpace(), CustomSpace()) 51 | 52 | assert isinstance(env.single_observation_space, CustomSpace) 53 | assert isinstance(env.observation_space, Tuple) 54 | 55 | assert isinstance(env.single_action_space, CustomSpace) 56 | assert isinstance(env.action_space, Tuple) 57 | -------------------------------------------------------------------------------- /envs/gym/tests/vector/test_vector_env_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.vector import make 3 | from gym.vector import VectorEnvWrapper 4 | 5 | 6 | class DummyWrapper(VectorEnvWrapper): 7 | def __init__(self, env): 8 | self.env = env 9 | self.counter = 0 10 | 11 | def reset_async(self): 12 | super().reset_async() 13 | self.counter += 1 14 | 15 | 16 | def test_vector_env_wrapper_inheritance(): 17 | env = make("FrozenLake-v1", asynchronous=False) 18 | wrapped = DummyWrapper(env) 19 | wrapped.reset() 20 | assert wrapped.counter == 1 21 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/wrappers/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/monitoring/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/wrappers/monitoring/__init__.py -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/monitoring/helpers.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import shutil 3 | import tempfile 4 | 5 | 6 | @contextlib.contextmanager 7 | def tempdir(): 8 | temp = tempfile.mkdtemp() 9 | yield temp 10 | shutil.rmtree(temp) 11 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/monitoring/test_video_recorder.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import os 3 | import time 4 | 5 | import gym 6 | from gym.wrappers.monitoring.video_recorder import VideoRecorder 7 | 8 | 9 | class BrokenRecordableEnv(object): 10 | metadata = {"render.modes": [None, "rgb_array"]} 11 | 12 | def render(self, mode=None): 13 | pass 14 | 15 | 16 | class UnrecordableEnv(object): 17 | metadata = {"render.modes": [None]} 18 | 19 | def render(self, mode=None): 20 | pass 21 | 22 | 23 | def test_record_simple(): 24 | env = gym.make("CartPole-v1") 25 | rec = VideoRecorder(env) 26 | env.reset() 27 | rec.capture_frame() 28 | proc = rec.encoder.proc 29 | 30 | assert proc.poll() is None # subprocess is running 31 | 32 | rec.close() 33 | 34 | assert proc.poll() is not None # subprocess is terminated 35 | assert not rec.empty 36 | assert not rec.broken 37 | assert os.path.exists(rec.path) 38 | f = open(rec.path) 39 | assert os.fstat(f.fileno()).st_size > 100 40 | 41 | 42 | def test_autoclose(): 43 | def record(): 44 | env = gym.make("CartPole-v1") 45 | rec = VideoRecorder(env) 46 | env.reset() 47 | rec.capture_frame() 48 | 49 | rec_path = rec.path 50 | proc = rec.encoder.proc 51 | 52 | assert proc.poll() is None # subprocess is running 53 | 54 | # The function ends without an explicit `rec.close()` call 55 | # The Python interpreter will implicitly do `del rec` on garbage cleaning 56 | return rec_path, proc 57 | 58 | rec_path, proc = record() 59 | 60 | gc.collect() # do explicit garbage collection for test 61 | time.sleep(5) # wait for subprocess exiting 62 | 63 | assert proc.poll() is not None # subprocess is terminated 64 | assert os.path.exists(rec_path) 65 | f = open(rec_path) 66 | assert os.fstat(f.fileno()).st_size > 100 67 | 68 | 69 | def test_no_frames(): 70 | env = BrokenRecordableEnv() 71 | rec = VideoRecorder(env) 72 | rec.close() 73 | assert rec.empty 74 | assert rec.functional 75 | assert not os.path.exists(rec.path) 76 | 77 | 78 | def test_record_unrecordable_method(): 79 | env = UnrecordableEnv() 80 | rec = VideoRecorder(env) 81 | assert not rec.enabled 82 | rec.close() 83 | 84 | 85 | def test_record_breaking_render_method(): 86 | env = BrokenRecordableEnv() 87 | rec = VideoRecorder(env) 88 | rec.capture_frame() 89 | rec.close() 90 | assert rec.empty 91 | assert rec.broken 92 | assert not os.path.exists(rec.path) 93 | 94 | 95 | def test_text_envs(): 96 | env = gym.make("FrozenLake-v1") 97 | video = VideoRecorder(env) 98 | try: 99 | env.reset() 100 | video.capture_frame() 101 | video.close() 102 | finally: 103 | os.remove(video.path) 104 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_clip_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym.wrappers import ClipAction 5 | 6 | 7 | def test_clip_action(): 8 | # mountaincar: action-based rewards 9 | make_env = lambda: gym.make("MountainCarContinuous-v0") 10 | env = make_env() 11 | wrapped_env = ClipAction(make_env()) 12 | 13 | seed = 0 14 | env.seed(seed) 15 | wrapped_env.seed(seed) 16 | 17 | env.reset() 18 | wrapped_env.reset() 19 | 20 | actions = [[0.4], [1.2], [-0.3], [0.0], [-2.5]] 21 | for action in actions: 22 | obs1, r1, d1, _ = env.step( 23 | np.clip(action, env.action_space.low, env.action_space.high) 24 | ) 25 | obs2, r2, d2, _ = wrapped_env.step(action) 26 | assert np.allclose(r1, r2) 27 | assert np.allclose(obs1, obs2) 28 | assert d1 == d2 29 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_flatten_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import FlattenObservation 7 | from gym import spaces 8 | 9 | 10 | @pytest.mark.parametrize("env_id", ["Blackjack-v1"]) 11 | def test_flatten_observation(env_id): 12 | env = gym.make(env_id) 13 | wrapped_env = FlattenObservation(env) 14 | 15 | obs = env.reset() 16 | wrapped_obs = wrapped_env.reset() 17 | 18 | space = spaces.Tuple((spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))) 19 | wrapped_space = spaces.Box(0, 1, [32 + 11 + 2], dtype=np.int64) 20 | 21 | assert space.contains(obs) 22 | assert wrapped_space.contains(wrapped_obs) 23 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_frame_stack.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pytest.importorskip("gym.envs.atari") 4 | 5 | import numpy as np 6 | import gym 7 | from gym.wrappers import FrameStack 8 | 9 | try: 10 | import lz4 11 | except ImportError: 12 | lz4 = None 13 | 14 | 15 | @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1", "Pong-v0"]) 16 | @pytest.mark.parametrize("num_stack", [2, 3, 4]) 17 | @pytest.mark.parametrize( 18 | "lz4_compress", 19 | [ 20 | pytest.param( 21 | True, 22 | marks=pytest.mark.skipif( 23 | lz4 is None, reason="Need lz4 to run tests with compression" 24 | ), 25 | ), 26 | False, 27 | ], 28 | ) 29 | def test_frame_stack(env_id, num_stack, lz4_compress): 30 | env = gym.make(env_id) 31 | env.seed(0) 32 | shape = env.observation_space.shape 33 | env = FrameStack(env, num_stack, lz4_compress) 34 | assert env.observation_space.shape == (num_stack,) + shape 35 | assert env.observation_space.dtype == env.env.observation_space.dtype 36 | 37 | dup = gym.make(env_id) 38 | dup.seed(0) 39 | 40 | obs = env.reset() 41 | dup_obs = dup.reset() 42 | assert np.allclose(obs[-1], dup_obs) 43 | 44 | for _ in range(num_stack ** 2): 45 | action = env.action_space.sample() 46 | dup_obs, _, _, _ = dup.step(action) 47 | obs, _, _, _ = env.step(action) 48 | assert np.allclose(obs[-1], dup_obs) 49 | 50 | assert len(obs) == num_stack 51 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_gray_scale_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import GrayScaleObservation 7 | from gym.wrappers import AtariPreprocessing 8 | 9 | pytest.importorskip("gym.envs.atari") 10 | pytest.importorskip("cv2") 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "env_id", ["PongNoFrameskip-v0", "SpaceInvadersNoFrameskip-v0"] 15 | ) 16 | @pytest.mark.parametrize("keep_dim", [True, False]) 17 | def test_gray_scale_observation(env_id, keep_dim): 18 | gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True) 19 | rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False) 20 | wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim) 21 | assert rgb_env.observation_space.shape[-1] == 3 22 | 23 | seed = 0 24 | gray_env.seed(seed) 25 | wrapped_env.seed(seed) 26 | 27 | gray_obs = gray_env.reset() 28 | wrapped_obs = wrapped_env.reset() 29 | 30 | if keep_dim: 31 | assert wrapped_env.observation_space.shape[-1] == 1 32 | assert len(wrapped_obs.shape) == 3 33 | wrapped_obs = wrapped_obs.squeeze(-1) 34 | else: 35 | assert len(wrapped_env.observation_space.shape) == 2 36 | assert len(wrapped_obs.shape) == 2 37 | 38 | # ALE gray scale is slightly different, but no more than by one shade 39 | assert np.allclose(gray_obs.astype("int32"), wrapped_obs.astype("int32"), atol=1) 40 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_record_episode_statistics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import gym 4 | from gym.wrappers import RecordEpisodeStatistics 5 | 6 | 7 | @pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v1"]) 8 | @pytest.mark.parametrize("deque_size", [2, 5]) 9 | def test_record_episode_statistics(env_id, deque_size): 10 | env = gym.make(env_id) 11 | env = RecordEpisodeStatistics(env, deque_size) 12 | 13 | for n in range(5): 14 | env.reset() 15 | assert env.episode_returns[0] == 0.0 16 | assert env.episode_lengths[0] == 0 17 | for t in range(env.spec.max_episode_steps): 18 | _, _, done, info = env.step(env.action_space.sample()) 19 | if done: 20 | assert "episode" in info 21 | assert all([item in info["episode"] for item in ["r", "l", "t"]]) 22 | break 23 | assert len(env.return_queue) == deque_size 24 | assert len(env.length_queue) == deque_size 25 | 26 | 27 | @pytest.mark.parametrize("num_envs", [1, 4]) 28 | def test_record_episode_statistics_with_vectorenv(num_envs): 29 | envs = gym.vector.make("CartPole-v0", num_envs=num_envs, asynchronous=False) 30 | envs = RecordEpisodeStatistics(envs) 31 | envs.reset() 32 | for _ in range(envs.env.envs[0].spec.max_episode_steps + 1): 33 | _, _, dones, infos = envs.step(envs.action_space.sample()) 34 | for idx, info in enumerate(infos): 35 | if dones[idx]: 36 | assert "episode" in info 37 | assert all([item in info["episode"] for item in ["r", "l", "t"]]) 38 | break 39 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_record_video.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import os 4 | import shutil 5 | import gym 6 | from gym.wrappers import ( 7 | RecordEpisodeStatistics, 8 | RecordVideo, 9 | capped_cubic_video_schedule, 10 | ) 11 | 12 | 13 | def test_record_video_using_default_trigger(): 14 | 15 | env = gym.make("CartPole-v1") 16 | env = gym.wrappers.RecordVideo(env, "videos") 17 | env.reset() 18 | for _ in range(199): 19 | action = env.action_space.sample() 20 | _, _, done, _ = env.step(action) 21 | if done: 22 | env.reset() 23 | env.close() 24 | assert os.path.isdir("videos") 25 | mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] 26 | assert len(mp4_files) == sum( 27 | [capped_cubic_video_schedule(i) for i in range(env.episode_id + 1)] 28 | ) 29 | shutil.rmtree("videos") 30 | 31 | 32 | def test_record_video_step_trigger(): 33 | env = gym.make("CartPole-v1") 34 | env._max_episode_steps = 20 35 | env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0) 36 | env.reset() 37 | for _ in range(199): 38 | action = env.action_space.sample() 39 | _, _, done, _ = env.step(action) 40 | if done: 41 | env.reset() 42 | env.close() 43 | assert os.path.isdir("videos") 44 | mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] 45 | assert len(mp4_files) == 2 46 | shutil.rmtree("videos") 47 | 48 | 49 | def make_env(gym_id, seed): 50 | def thunk(): 51 | env = gym.make(gym_id) 52 | env._max_episode_steps = 20 53 | if seed == 1: 54 | env = gym.wrappers.RecordVideo( 55 | env, "videos", step_trigger=lambda x: x % 100 == 0 56 | ) 57 | return env 58 | 59 | return thunk 60 | 61 | 62 | def test_record_video_within_vector(): 63 | envs = gym.vector.SyncVectorEnv([make_env("CartPole-v1", 1 + i) for i in range(2)]) 64 | envs = gym.wrappers.RecordEpisodeStatistics(envs) 65 | envs.reset() 66 | for i in range(199): 67 | _, _, _, infos = envs.step(envs.action_space.sample()) 68 | for info in infos: 69 | if "episode" in info.keys(): 70 | print(f"episode_reward={info['episode']['r']}") 71 | break 72 | assert os.path.isdir("videos") 73 | mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] 74 | assert len(mp4_files) == 2 75 | shutil.rmtree("videos") 76 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_rescale_action.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import RescaleAction 7 | 8 | 9 | def test_rescale_action(): 10 | env = gym.make("CartPole-v1") 11 | with pytest.raises(AssertionError): 12 | env = RescaleAction(env, -1, 1) 13 | del env 14 | 15 | env = gym.make("Pendulum-v1") 16 | wrapped_env = RescaleAction(gym.make("Pendulum-v1"), -1, 1) 17 | 18 | seed = 0 19 | env.seed(seed) 20 | wrapped_env.seed(seed) 21 | 22 | obs = env.reset() 23 | wrapped_obs = wrapped_env.reset() 24 | assert np.allclose(obs, wrapped_obs) 25 | 26 | obs, reward, _, _ = env.step([1.5]) 27 | with pytest.raises(AssertionError): 28 | wrapped_env.step([1.5]) 29 | wrapped_obs, wrapped_reward, _, _ = wrapped_env.step([0.75]) 30 | 31 | assert np.allclose(obs, wrapped_obs) 32 | assert np.allclose(reward, wrapped_reward) 33 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_resize_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import gym 4 | from gym.wrappers import ResizeObservation 5 | 6 | pytest.importorskip("gym.envs.atari") 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "env_id", ["PongNoFrameskip-v0", "SpaceInvadersNoFrameskip-v0"] 11 | ) 12 | @pytest.mark.parametrize("shape", [16, 32, (8, 5), [10, 7]]) 13 | def test_resize_observation(env_id, shape): 14 | env = gym.make(env_id) 15 | env = ResizeObservation(env, shape) 16 | 17 | assert env.observation_space.shape[-1] == 3 18 | obs = env.reset() 19 | if isinstance(shape, int): 20 | assert env.observation_space.shape[:2] == (shape, shape) 21 | assert obs.shape == (shape, shape, 3) 22 | else: 23 | assert env.observation_space.shape[:2] == tuple(shape) 24 | assert obs.shape == tuple(shape) + (3,) 25 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_time_aware_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import gym 4 | from gym.wrappers import TimeAwareObservation 5 | 6 | 7 | @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) 8 | def test_time_aware_observation(env_id): 9 | env = gym.make(env_id) 10 | wrapped_env = TimeAwareObservation(env) 11 | 12 | assert wrapped_env.observation_space.shape[0] == env.observation_space.shape[0] + 1 13 | 14 | obs = env.reset() 15 | wrapped_obs = wrapped_env.reset() 16 | assert wrapped_env.t == 0.0 17 | assert wrapped_obs[-1] == 0.0 18 | assert wrapped_obs.shape[0] == obs.shape[0] + 1 19 | 20 | wrapped_obs, _, _, _ = wrapped_env.step(env.action_space.sample()) 21 | assert wrapped_env.t == 1.0 22 | assert wrapped_obs[-1] == 1.0 23 | assert wrapped_obs.shape[0] == obs.shape[0] + 1 24 | 25 | wrapped_obs, _, _, _ = wrapped_env.step(env.action_space.sample()) 26 | assert wrapped_env.t == 2.0 27 | assert wrapped_obs[-1] == 2.0 28 | assert wrapped_obs.shape[0] == obs.shape[0] + 1 29 | 30 | wrapped_obs = wrapped_env.reset() 31 | assert wrapped_env.t == 0.0 32 | assert wrapped_obs[-1] == 0.0 33 | assert wrapped_obs.shape[0] == obs.shape[0] + 1 34 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_transform_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import TransformObservation 7 | 8 | 9 | @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) 10 | def test_transform_observation(env_id): 11 | affine_transform = lambda x: 3 * x + 2 12 | env = gym.make(env_id) 13 | wrapped_env = TransformObservation( 14 | gym.make(env_id), lambda obs: affine_transform(obs) 15 | ) 16 | 17 | env.seed(0) 18 | wrapped_env.seed(0) 19 | 20 | obs = env.reset() 21 | wrapped_obs = wrapped_env.reset() 22 | assert np.allclose(wrapped_obs, affine_transform(obs)) 23 | 24 | action = env.action_space.sample() 25 | obs, reward, done, _ = env.step(action) 26 | wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action) 27 | assert np.allclose(wrapped_obs, affine_transform(obs)) 28 | assert np.allclose(wrapped_reward, reward) 29 | assert wrapped_done == done 30 | -------------------------------------------------------------------------------- /envs/gym/tests/wrappers/test_transform_reward.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import TransformReward 7 | 8 | 9 | @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) 10 | def test_transform_reward(env_id): 11 | # use case #1: scale 12 | scales = [0.1, 200] 13 | for scale in scales: 14 | env = gym.make(env_id) 15 | wrapped_env = TransformReward(gym.make(env_id), lambda r: scale * r) 16 | action = env.action_space.sample() 17 | 18 | env.seed(0) 19 | env.reset() 20 | wrapped_env.seed(0) 21 | wrapped_env.reset() 22 | 23 | _, reward, _, _ = env.step(action) 24 | _, wrapped_reward, _, _ = wrapped_env.step(action) 25 | 26 | assert wrapped_reward == scale * reward 27 | del env, wrapped_env 28 | 29 | # use case #2: clip 30 | min_r = -0.0005 31 | max_r = 0.0002 32 | env = gym.make(env_id) 33 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r)) 34 | action = env.action_space.sample() 35 | 36 | env.seed(0) 37 | env.reset() 38 | wrapped_env.seed(0) 39 | wrapped_env.reset() 40 | 41 | _, reward, _, _ = env.step(action) 42 | _, wrapped_reward, _, _ = wrapped_env.step(action) 43 | 44 | assert abs(wrapped_reward) < abs(reward) 45 | assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002 46 | del env, wrapped_env 47 | 48 | # use case #3: sign 49 | env = gym.make(env_id) 50 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r)) 51 | 52 | env.seed(0) 53 | env.reset() 54 | wrapped_env.seed(0) 55 | wrapped_env.reset() 56 | 57 | for _ in range(1000): 58 | action = env.action_space.sample() 59 | _, wrapped_reward, done, _ = wrapped_env.step(action) 60 | assert wrapped_reward in [-1.0, 0.0, 1.0] 61 | if done: 62 | break 63 | del env, wrapped_env 64 | -------------------------------------------------------------------------------- /envs/mujoco-maze/.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the main branch 7 | on: 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | jobs: 14 | ci: 15 | strategy: 16 | matrix: 17 | python-version: [3.6, 3.7, 3.8] 18 | runs-on: ubuntu-18.04 19 | env: 20 | LD_LIBRARY_PATH: /home/runner/.mujoco/mujoco200/bin 21 | steps: 22 | - uses: actions/checkout@v2 23 | - uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install Poetry 27 | run: pip install poetry 28 | - name: Install Mujoco Dependencies 29 | run: sudo apt update && sudo apt install libosmesa6-dev libglfw3 libglew-dev patchelf libgl1-mesa-glx -y 30 | - name: Install Mujoco 31 | run: | 32 | mkdir ~/.mujoco 33 | curl -sO https://www.roboti.us/download/mujoco200_linux.zip 34 | unzip mujoco200_linux.zip 35 | mv mujoco200_linux ~/.mujoco/mujoco200 36 | echo "${{ secrets.MUJOCO_KEY }}" > ~/.mujoco/mjkey.txt 37 | - name: Run tests 38 | run: | 39 | poetry install 40 | poetry run python -m pip freeze 41 | poetry run test 42 | -------------------------------------------------------------------------------- /envs/mujoco-maze/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | **/__pycache__/ 3 | **/.pytest_cache/ 4 | **/dist/ 5 | **/build/ 6 | **/*.egg-info/ 7 | **/.mypy_cache/ 8 | -------------------------------------------------------------------------------- /envs/mujoco-maze/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include mujoco_maze/assets/* 2 | -------------------------------------------------------------------------------- /envs/mujoco-maze/mujoco_maze/agent_model.py: -------------------------------------------------------------------------------- 1 | """Common APIs for defining mujoco robot. 2 | """ 3 | from abc import ABC, abstractmethod 4 | from typing import Optional 5 | 6 | import numpy as np 7 | from gym.envs.mujoco.mujoco_env import MujocoEnv 8 | from gym.utils import EzPickle 9 | 10 | 11 | class AgentModel(ABC, MujocoEnv, EzPickle): 12 | FILE: str 13 | MANUAL_COLLISION: bool 14 | ORI_IND: Optional[int] = None 15 | RADIUS: Optional[float] = None 16 | OBJBALL_TYPE: Optional[str] = None 17 | 18 | def __init__(self, file_path: str, frame_skip: int) -> None: 19 | MujocoEnv.__init__(self, file_path, frame_skip) 20 | EzPickle.__init__(self) 21 | 22 | def close(self): 23 | if self.viewer is not None and hasattr(self.viewer, "window"): 24 | import glfw 25 | 26 | glfw.destroy_window(self.viewer.window) 27 | super().close() 28 | 29 | @abstractmethod 30 | def _get_obs(self) -> np.ndarray: 31 | """Returns the observation from the model.""" 32 | pass 33 | 34 | def get_xy(self) -> np.ndarray: 35 | """Returns the coordinate of the agent.""" 36 | pass 37 | 38 | def set_xy(self, xy: np.ndarray) -> None: 39 | """Set the coordinate of the agent.""" 40 | pass 41 | -------------------------------------------------------------------------------- /envs/mujoco-maze/mujoco_maze/assets/point.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 35 | -------------------------------------------------------------------------------- /envs/mujoco-maze/mujoco_maze/assets/reacher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 35 | -------------------------------------------------------------------------------- /envs/mujoco-maze/mujoco_maze/assets/swimmer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 40 | -------------------------------------------------------------------------------- /envs/mujoco-maze/mujoco_maze/point.py: -------------------------------------------------------------------------------- 1 | """ 2 | A ball-like robot as an explorer in the maze. 3 | Based on `models`_ and `rllab`_. 4 | 5 | .. _models: https://github.com/tensorflow/models/tree/master/research/efficient-hrl 6 | .. _rllab: https://github.com/rll/rllab 7 | """ 8 | 9 | from typing import Optional, Tuple 10 | 11 | import gym 12 | import numpy as np 13 | 14 | from mujoco_maze.agent_model import AgentModel 15 | 16 | 17 | class PointEnv(AgentModel): 18 | FILE: str = "point.xml" 19 | ORI_IND: int = 2 20 | MANUAL_COLLISION: bool = True 21 | RADIUS: float = 0.4 22 | OBJBALL_TYPE: str = "hinge" 23 | 24 | VELOCITY_LIMITS: float = 10.0 25 | 26 | def __init__(self, file_path: Optional[str] = None) -> None: 27 | super().__init__(file_path, 1) 28 | high = np.inf * np.ones(6, dtype=np.float32) 29 | high[3:] = self.VELOCITY_LIMITS * 1.2 30 | high[self.ORI_IND] = np.pi 31 | low = -high 32 | self.observation_space = gym.spaces.Box(low, high) 33 | 34 | def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: 35 | qpos = self.sim.data.qpos.copy() 36 | qpos[2] += action[1] 37 | # Clip orientation 38 | if qpos[2] < -np.pi: 39 | qpos[2] += np.pi * 2 40 | elif np.pi < qpos[2]: 41 | qpos[2] -= np.pi * 2 42 | ori = qpos[2] 43 | # Compute increment in each direction 44 | qpos[0] += np.cos(ori) * action[0] 45 | qpos[1] += np.sin(ori) * action[0] 46 | qvel = np.clip(self.sim.data.qvel, -self.VELOCITY_LIMITS, self.VELOCITY_LIMITS) 47 | self.set_state(qpos, qvel) 48 | for _ in range(0, self.frame_skip): 49 | self.sim.step() 50 | next_obs = self._get_obs() 51 | return next_obs, 0.0, False, {} 52 | 53 | def _get_obs(self): 54 | return np.concatenate( 55 | [ 56 | self.sim.data.qpos.flat[:3], # Only point-relevant coords. 57 | self.sim.data.qvel.flat[:3], 58 | ] 59 | ) 60 | 61 | def reset_model(self): 62 | qpos = self.init_qpos # + self.np_random.uniform( 63 | # size=self.sim.model.nq, low=-0.1, high=0.1 64 | # ) 65 | qvel = self.init_qvel # + self.np_random.randn(self.sim.model.nv) * 0.1 66 | 67 | # Set everything other than point to original position and 0 velocity. 68 | qpos[3:] = self.init_qpos[3:] 69 | qvel[3:] = 0.0 70 | self.set_state(qpos, qvel) 71 | return self._get_obs() 72 | 73 | def get_xy(self): 74 | return self.sim.data.qpos[:2].copy() 75 | 76 | def set_xy(self, xy: np.ndarray) -> None: 77 | qpos = self.sim.data.qpos.copy() 78 | qpos[:2] = xy 79 | self.set_state(qpos, self.sim.data.qvel) 80 | 81 | def get_ori(self): 82 | return self.sim.data.qpos[self.ORI_IND] 83 | -------------------------------------------------------------------------------- /envs/mujoco-maze/mujoco_maze/reacher.py: -------------------------------------------------------------------------------- 1 | """ 2 | Based on the reacher in `dm_control`_. 3 | 4 | .. _dm_control: https://github.com/deepmind/dm_control 5 | """ 6 | 7 | from typing import Tuple 8 | 9 | import numpy as np 10 | 11 | from mujoco_maze.agent_model import AgentModel 12 | from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm 13 | 14 | 15 | class ReacherEnv(AgentModel): 16 | FILE: str = "reacher.xml" 17 | MANUAL_COLLISION: bool = False 18 | 19 | def __init__( 20 | self, 21 | file_path: str = None, 22 | forward_reward_weight: float = 1.0, 23 | ctrl_cost_weight: float = 1e-4, 24 | forward_reward_fn: ForwardRewardFn = forward_reward_vnorm, 25 | ) -> None: 26 | self._forward_reward_weight = forward_reward_weight 27 | self._ctrl_cost_weight = ctrl_cost_weight 28 | self._forward_reward_fn = forward_reward_fn 29 | super().__init__(file_path, 4) 30 | 31 | def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]: 32 | xy_pos_after = self.sim.data.qpos[:2].copy() 33 | xy_velocity = (xy_pos_after - xy_pos_before) / self.dt 34 | return self._forward_reward_fn(xy_velocity) 35 | 36 | def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: 37 | xy_pos_before = self.sim.data.qpos[:2].copy() 38 | self.do_simulation(action, self.frame_skip) 39 | 40 | forward_reward = self._forward_reward(xy_pos_before) 41 | ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action)) 42 | return ( 43 | self._get_obs(), 44 | self._forward_reward_weight * forward_reward - ctrl_cost, 45 | False, 46 | dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost), 47 | ) 48 | 49 | def _get_obs(self) -> np.ndarray: 50 | position = self.sim.data.qpos.flat.copy() 51 | velocity = self.sim.data.qvel.flat.copy() 52 | observation = np.concatenate([position, velocity]).ravel() 53 | return observation 54 | 55 | def reset_model(self) -> np.ndarray: 56 | qpos = self.init_qpos + self.np_random.uniform( 57 | low=-0.1, 58 | high=0.1, 59 | size=self.model.nq, 60 | ) 61 | qvel = self.init_qvel + self.np_random.uniform( 62 | low=-0.1, 63 | high=0.1, 64 | size=self.model.nv, 65 | ) 66 | 67 | self.set_state(qpos, qvel) 68 | return self._get_obs() 69 | 70 | def set_xy(self, xy: np.ndarray) -> None: 71 | qpos = self.sim.data.qpos.copy() 72 | qpos[:2] = xy 73 | self.set_state(qpos, self.sim.data.qvel) 74 | 75 | def get_xy(self) -> np.ndarray: 76 | return np.copy(self.sim.data.qpos[:2]) 77 | -------------------------------------------------------------------------------- /envs/mujoco-maze/mujoco_maze/swimmer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Swimmer robot as an explorer in the maze. 3 | Based on `gym`_ (swimmer-v3). 4 | 5 | .. _gym: https://github.com/openai/gym 6 | """ 7 | 8 | from typing import Tuple 9 | 10 | import numpy as np 11 | 12 | from mujoco_maze.agent_model import AgentModel 13 | from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm 14 | 15 | 16 | class SwimmerEnv(AgentModel): 17 | FILE: str = "swimmer.xml" 18 | MANUAL_COLLISION: bool = False 19 | 20 | def __init__( 21 | self, 22 | file_path: str = None, 23 | forward_reward_weight: float = 1.0, 24 | ctrl_cost_weight: float = 1e-4, 25 | forward_reward_fn: ForwardRewardFn = forward_reward_vnorm, 26 | ) -> None: 27 | self._forward_reward_weight = forward_reward_weight 28 | self._ctrl_cost_weight = ctrl_cost_weight 29 | self._forward_reward_fn = forward_reward_fn 30 | super().__init__(file_path, 4) 31 | 32 | def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]: 33 | xy_pos_after = self.sim.data.qpos[:2].copy() 34 | xy_velocity = (xy_pos_after - xy_pos_before) / self.dt 35 | return self._forward_reward_fn(xy_velocity) 36 | 37 | def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: 38 | xy_pos_before = self.sim.data.qpos[:2].copy() 39 | self.do_simulation(action, self.frame_skip) 40 | forward_reward = self._forward_reward(xy_pos_before) 41 | ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action)) 42 | return ( 43 | self._get_obs(), 44 | self._forward_reward_weight * forward_reward - ctrl_cost, 45 | False, 46 | dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost), 47 | ) 48 | 49 | def _get_obs(self) -> np.ndarray: 50 | position = self.sim.data.qpos.flat.copy() 51 | velocity = self.sim.data.qvel.flat.copy() 52 | observation = np.concatenate([position, velocity]).ravel() 53 | return observation 54 | 55 | def reset_model(self) -> np.ndarray: 56 | qpos = self.init_qpos + self.np_random.uniform( 57 | low=-0.1, 58 | high=0.1, 59 | size=self.model.nq, 60 | ) 61 | qvel = self.init_qvel + self.np_random.uniform( 62 | low=-0.1, 63 | high=0.1, 64 | size=self.model.nv, 65 | ) 66 | 67 | self.set_state(qpos, qvel) 68 | return self._get_obs() 69 | 70 | def set_xy(self, xy: np.ndarray) -> None: 71 | qpos = self.sim.data.qpos.copy() 72 | qpos[:2] = xy 73 | self.set_state(qpos, self.sim.data.qvel) 74 | 75 | def get_xy(self) -> np.ndarray: 76 | return np.copy(self.sim.data.qpos[:2]) 77 | -------------------------------------------------------------------------------- /envs/mujoco-maze/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "mujoco-maze" 3 | version = "0.2.0" 4 | description = "Simple maze environments using mujoco-py" 5 | license = "Apache-2.0" 6 | authors = ["Yuji Kanagawa "] 7 | readme = "README.md" # Markdown files are supported 8 | 9 | repository = "https://github.com/kngwyu/mujoco-maze" 10 | homepage = "https://github.com/kngwyu/mujoco-maze" 11 | 12 | [tool.poetry.dependencies] 13 | python = ">=3.6" # Compatible python versions must be declared here 14 | gym = ">=0.16" 15 | mujoco-py = ">=1.5" 16 | 17 | [tool.poetry.dev-dependencies] 18 | pytest = "^6.2" 19 | 20 | [tool.poetry.scripts] 21 | test = "pytest:main" 22 | 23 | [tool.black] 24 | line-length = 88 25 | target-version = ['py36'] 26 | include = '\.pyi?$' 27 | exclude = ''' 28 | 29 | ( 30 | /( 31 | \.eggs 32 | | \.git 33 | | \.mypy_cache 34 | | _build 35 | | buck-out 36 | | build 37 | | dist 38 | )/ 39 | ) 40 | ''' -------------------------------------------------------------------------------- /envs/mujoco-maze/screenshots/AntFall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/AntFall.png -------------------------------------------------------------------------------- /envs/mujoco-maze/screenshots/AntPush.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/AntPush.png -------------------------------------------------------------------------------- /envs/mujoco-maze/screenshots/Point4Rooms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/Point4Rooms.png -------------------------------------------------------------------------------- /envs/mujoco-maze/screenshots/PointBilliard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/PointBilliard.png -------------------------------------------------------------------------------- /envs/mujoco-maze/screenshots/PointCorridor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/PointCorridor.png -------------------------------------------------------------------------------- /envs/mujoco-maze/screenshots/PointUMaze.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/PointUMaze.png -------------------------------------------------------------------------------- /envs/mujoco-maze/screenshots/SwimmerSquareRoom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/SwimmerSquareRoom.png -------------------------------------------------------------------------------- /envs/mujoco-maze/screenshots/WebVis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/WebVis.png -------------------------------------------------------------------------------- /envs/mujoco-maze/setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | ignore = W391, W503, F821, E203, E231 4 | 5 | [mypy] 6 | python_version = 3.6 7 | ignore_missing_imports = True 8 | warn_unused_configs = True 9 | 10 | [isort] 11 | multi_line_output=3 12 | include_trailing_comma=True 13 | force_grid_wrap=0 14 | use_parentheses=True 15 | line_length=88 16 | lines_between_types=0 17 | combine_as_imports=True 18 | known_first_party=rainy, tests 19 | -------------------------------------------------------------------------------- /envs/mujoco-maze/setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import re 4 | 5 | from setuptools import find_packages, setup 6 | 7 | NAME = "mujoco-maze" 8 | AUTHOR = "Yuji Kanagawa" 9 | EMAIL = "yuji.kngw.80s.revive@gmail.com" 10 | URL = "https://github.com/kngwyu/mujoco-maze" 11 | REQUIRES_PYTHON = ">=3.6.0" 12 | DESCRIPTION = "Simple maze environments using mujoco-py" 13 | 14 | here = os.path.abspath(os.path.dirname(__file__)) 15 | 16 | with io.open(os.path.join(here, "mujoco_maze/__init__.py"), "rt", encoding="utf8") as f: 17 | VERSION = re.search(r"__version__ = \"(.*?)\"", f.read()).group(1) 18 | 19 | try: 20 | with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f: 21 | LONG_DESCRIPTION = "\n" + f.read() 22 | except FileNotFoundError: 23 | LONG_DESCRIPTION = DESCRIPTION 24 | 25 | 26 | REQUIRED = ["gym>=0.16.0", "mujoco-py>=1.5.0"] 27 | 28 | 29 | setup( 30 | name=NAME, 31 | version=VERSION, 32 | url=URL, 33 | project_urls={ 34 | "Code": URL, 35 | "Issue tracker": URL + "/issues", 36 | }, 37 | author=AUTHOR, 38 | author_email=EMAIL, 39 | description=DESCRIPTION, 40 | long_description=LONG_DESCRIPTION, 41 | long_description_content_type="text/markdown", 42 | packages=find_packages(), 43 | python_requires=REQUIRES_PYTHON, 44 | install_requires=REQUIRED, 45 | license="Apache2", 46 | classifiers=[ 47 | "License :: OSI Approved :: Apache Software License", 48 | "Development Status :: 3 - Alpha", 49 | "Intended Audience :: Science/Research", 50 | "Natural Language :: English", 51 | "Operating System :: OS Independent", 52 | "Programming Language :: Python", 53 | "Programming Language :: Python :: 3", 54 | "Programming Language :: Python :: 3.6", 55 | "Programming Language :: Python :: 3.7", 56 | "Programming Language :: Python :: 3.8", 57 | "Programming Language :: Python :: Implementation :: CPython", 58 | ], 59 | ) 60 | -------------------------------------------------------------------------------- /envs/mujoco-maze/tests/test_intersect.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from mujoco_maze.maze_env_utils import Line 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "l1, l2, p, ans", 9 | [ 10 | ((0.0, 0.0), (4.0, 4.0), (1.0, 3.0), 2.0 ** 0.5), 11 | ((-3.0, -3.0), (0.0, 1.0), (-3.0, 1.0), 2.4), 12 | ], 13 | ) 14 | def test_distance(l1, l2, p, ans): 15 | line = Line(l1, l2) 16 | point = complex(*p) 17 | assert abs(line.distance(point) - ans) <= 1e-8 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "l1p1, l1p2, l2p1, l2p2, none", 22 | [ 23 | ((0.0, 0.0), (1.0, 0.0), (0.0, -1.0), (1.0, 1.0), False), 24 | ((1.0, 1.0), (2.0, 3.0), (-1.0, 1.5), (1.5, 1.0), False), 25 | ((1.5, 1.5), (2.0, 3.0), (-1.0, 1.5), (1.5, 1.0), True), 26 | ((0.0, 0.0), (2.0, 0.0), (1.0, 0.0), (1.0, 3.0), False), 27 | ], 28 | ) 29 | def test_intersect(l1p1, l1p2, l2p1, l2p2, none): 30 | l1 = Line(l1p1, l1p2) 31 | l2 = Line(l2p1, l2p2) 32 | i1 = l1.intersect(l2) 33 | i2 = line_intersect(l1p1, l1p2, l2p1, l2p2) 34 | if none: 35 | assert i1 is None and i2 is None 36 | else: 37 | assert i1 is not None 38 | i1 = np.array([i1.real, i1.imag]) 39 | np.testing.assert_array_almost_equal(i1, np.array(i2)) 40 | 41 | 42 | def line_intersect(pt1, pt2, ptA, ptB): 43 | """ 44 | Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html 45 | Returns the intersection of Line(pt1,pt2) and Line(ptA,ptB). 46 | """ 47 | import math 48 | 49 | DET_TOLERANCE = 0.00000001 50 | 51 | # the first line is pt1 + r*(pt2-pt1) 52 | # in component form: 53 | x1, y1 = pt1 54 | x2, y2 = pt2 55 | dx1 = x2 - x1 56 | dy1 = y2 - y1 57 | 58 | # the second line is ptA + s*(ptB-ptA) 59 | x, y = ptA 60 | xB, yB = ptB 61 | dx = xB - x 62 | dy = yB - y 63 | 64 | DET = -dx1 * dy + dy1 * dx 65 | 66 | if math.fabs(DET) < DET_TOLERANCE: 67 | return None 68 | 69 | # now, the determinant should be OK 70 | DETinv = 1.0 / DET 71 | 72 | # find the scalar amount along the "self" segment 73 | r = DETinv * (-dy * (x - x1) + dx * (y - y1)) 74 | 75 | # find the scalar amount along the input line 76 | s = DETinv * (-dy1 * (x - x1) + dx1 * (y - y1)) 77 | 78 | # return the average of the two descriptions 79 | xi = (x1 + r * dx1 + x + s * dx) / 2.0 80 | yi = (y1 + r * dy1 + y + s * dy) / 2.0 81 | if r >= 0 and 0 <= s <= 1: 82 | return xi, yi 83 | else: 84 | return None 85 | -------------------------------------------------------------------------------- /helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/helpers/__init__.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.12.1 2 | torch-tb-profiler==0.3.1 3 | tensorboard==2.8.0 4 | tensorboard-data-server==0.6.1 5 | tensorboard-plugin-wit==1.8.1 6 | sklearn==0.0 7 | scikit-image==0.15.0 8 | scikit-learn==1.0.2 9 | scipy==1.8.0 10 | tqdm==4.63.0 11 | numpy==1.22.2 12 | mujoco-py==2.1.2.14 13 | matplotlib==3.5.1 14 | gpytorch==1.9.0 15 | protobuf==3.19.4 16 | stable_baselines3==1.4.1a1 17 | POT==0.9.0 18 | Cython==0.29.28 19 | setuptools==58.0.4 --------------------------------------------------------------------------------