├── .gitignore
├── LICENSE
├── README.md
├── envs
├── gym
│ ├── .github
│ │ ├── ISSUE_TEMPLATE
│ │ │ ├── bug.md
│ │ │ ├── proposal.md
│ │ │ └── question.md
│ │ ├── stale.yml
│ │ └── workflows
│ │ │ ├── build.yml
│ │ │ ├── lint_python.yml
│ │ │ └── pre-commit.yml
│ ├── .gitignore
│ ├── .pre-commit-config.yaml
│ ├── CODE_OF_CONDUCT.rst
│ ├── CONTRIBUTING.md
│ ├── LICENSE.md
│ ├── README.md
│ ├── bin
│ │ └── docker_entrypoint
│ ├── docs
│ │ ├── api.md
│ │ ├── creating_environments.md
│ │ ├── third_party_environments.md
│ │ ├── toy_text
│ │ │ ├── blackjack.md
│ │ │ ├── frozen_lake.md
│ │ │ └── taxi.md
│ │ ├── tutorials.md
│ │ └── wrappers.md
│ ├── gym
│ │ ├── __init__.py
│ │ ├── core.py
│ │ ├── envs
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── box2d
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bipedal_walker.py
│ │ │ │ ├── car_dynamics.py
│ │ │ │ ├── car_racing.py
│ │ │ │ ├── lunar_lander.py
│ │ │ │ └── parking_garage
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── bus.py
│ │ │ │ │ ├── race_car.py
│ │ │ │ │ ├── street_car.py
│ │ │ │ │ └── trike.py
│ │ │ ├── classic_control
│ │ │ │ ├── __init__.py
│ │ │ │ ├── acrobot.py
│ │ │ │ ├── assets
│ │ │ │ │ └── clockwise.png
│ │ │ │ ├── cartpole.py
│ │ │ │ ├── continuous_mountain_car.py
│ │ │ │ ├── mountain_car.py
│ │ │ │ ├── pendulum.py
│ │ │ │ └── rendering.py
│ │ │ ├── mujoco
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ant.py
│ │ │ │ ├── ant_v3.py
│ │ │ │ ├── assets
│ │ │ │ │ ├── ant.xml
│ │ │ │ │ ├── half_cheetah.xml
│ │ │ │ │ ├── hopper.xml
│ │ │ │ │ ├── humanoid.xml
│ │ │ │ │ ├── humanoidstandup.xml
│ │ │ │ │ ├── inverted_double_pendulum.xml
│ │ │ │ │ ├── inverted_pendulum.xml
│ │ │ │ │ ├── point.xml
│ │ │ │ │ ├── pusher.xml
│ │ │ │ │ ├── reacher.xml
│ │ │ │ │ ├── striker.xml
│ │ │ │ │ ├── swimmer.xml
│ │ │ │ │ ├── thrower.xml
│ │ │ │ │ └── walker2d.xml
│ │ │ │ ├── half_cheetah.py
│ │ │ │ ├── half_cheetah_v3.py
│ │ │ │ ├── hopper.py
│ │ │ │ ├── hopper_v3.py
│ │ │ │ ├── humanoid.py
│ │ │ │ ├── humanoid_v3.py
│ │ │ │ ├── humanoidstandup.py
│ │ │ │ ├── inverted_double_pendulum.py
│ │ │ │ ├── inverted_pendulum.py
│ │ │ │ ├── mujoco_env.py
│ │ │ │ ├── pusher.py
│ │ │ │ ├── reacher.py
│ │ │ │ ├── striker.py
│ │ │ │ ├── swimmer.py
│ │ │ │ ├── swimmer_v3.py
│ │ │ │ ├── thrower.py
│ │ │ │ ├── walker2d.py
│ │ │ │ └── walker2d_v3.py
│ │ │ ├── registration.py
│ │ │ ├── robotics
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── assets
│ │ │ │ │ ├── LICENSE.md
│ │ │ │ │ ├── fetch
│ │ │ │ │ │ ├── pick_and_place.xml
│ │ │ │ │ │ ├── push.xml
│ │ │ │ │ │ ├── reach.xml
│ │ │ │ │ │ ├── robot.xml
│ │ │ │ │ │ ├── shared.xml
│ │ │ │ │ │ └── slide.xml
│ │ │ │ │ ├── hand
│ │ │ │ │ │ ├── manipulate_block.xml
│ │ │ │ │ │ ├── manipulate_block_touch_sensors.xml
│ │ │ │ │ │ ├── manipulate_egg.xml
│ │ │ │ │ │ ├── manipulate_egg_touch_sensors.xml
│ │ │ │ │ │ ├── manipulate_pen.xml
│ │ │ │ │ │ ├── manipulate_pen_touch_sensors.xml
│ │ │ │ │ │ ├── reach.xml
│ │ │ │ │ │ ├── robot.xml
│ │ │ │ │ │ ├── robot_touch_sensors_92.xml
│ │ │ │ │ │ ├── shared.xml
│ │ │ │ │ │ ├── shared_asset.xml
│ │ │ │ │ │ └── shared_touch_sensors_92.xml
│ │ │ │ │ ├── stls
│ │ │ │ │ │ ├── .get
│ │ │ │ │ │ ├── fetch
│ │ │ │ │ │ │ ├── base_link_collision.stl
│ │ │ │ │ │ │ ├── bellows_link_collision.stl
│ │ │ │ │ │ │ ├── elbow_flex_link_collision.stl
│ │ │ │ │ │ │ ├── estop_link.stl
│ │ │ │ │ │ │ ├── forearm_roll_link_collision.stl
│ │ │ │ │ │ │ ├── gripper_link.stl
│ │ │ │ │ │ │ ├── head_pan_link_collision.stl
│ │ │ │ │ │ │ ├── head_tilt_link_collision.stl
│ │ │ │ │ │ │ ├── l_wheel_link_collision.stl
│ │ │ │ │ │ │ ├── laser_link.stl
│ │ │ │ │ │ │ ├── r_wheel_link_collision.stl
│ │ │ │ │ │ │ ├── shoulder_lift_link_collision.stl
│ │ │ │ │ │ │ ├── shoulder_pan_link_collision.stl
│ │ │ │ │ │ │ ├── torso_fixed_link.stl
│ │ │ │ │ │ │ ├── torso_lift_link_collision.stl
│ │ │ │ │ │ │ ├── upperarm_roll_link_collision.stl
│ │ │ │ │ │ │ ├── wrist_flex_link_collision.stl
│ │ │ │ │ │ │ └── wrist_roll_link_collision.stl
│ │ │ │ │ │ └── hand
│ │ │ │ │ │ │ ├── F1.stl
│ │ │ │ │ │ │ ├── F2.stl
│ │ │ │ │ │ │ ├── F3.stl
│ │ │ │ │ │ │ ├── TH1_z.stl
│ │ │ │ │ │ │ ├── TH2_z.stl
│ │ │ │ │ │ │ ├── TH3_z.stl
│ │ │ │ │ │ │ ├── forearm_electric.stl
│ │ │ │ │ │ │ ├── forearm_electric_cvx.stl
│ │ │ │ │ │ │ ├── knuckle.stl
│ │ │ │ │ │ │ ├── lfmetacarpal.stl
│ │ │ │ │ │ │ ├── palm.stl
│ │ │ │ │ │ │ └── wrist.stl
│ │ │ │ │ └── textures
│ │ │ │ │ │ ├── block.png
│ │ │ │ │ │ └── block_hidden.png
│ │ │ │ ├── fetch
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── pick_and_place.py
│ │ │ │ │ ├── push.py
│ │ │ │ │ ├── reach.py
│ │ │ │ │ └── slide.py
│ │ │ │ ├── fetch_env.py
│ │ │ │ ├── hand
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── manipulate.py
│ │ │ │ │ ├── manipulate_touch_sensors.py
│ │ │ │ │ └── reach.py
│ │ │ │ ├── hand_env.py
│ │ │ │ ├── robot_env.py
│ │ │ │ ├── rotations.py
│ │ │ │ └── utils.py
│ │ │ ├── toy_text
│ │ │ │ ├── __init__.py
│ │ │ │ ├── blackjack.py
│ │ │ │ ├── cliffwalking.py
│ │ │ │ ├── discrete.py
│ │ │ │ ├── frozen_lake.py
│ │ │ │ └── taxi.py
│ │ │ └── unittest
│ │ │ │ ├── __init__.py
│ │ │ │ ├── cube_crash.py
│ │ │ │ └── memorize_digits.py
│ │ ├── error.py
│ │ ├── logger.py
│ │ ├── spaces
│ │ │ ├── __init__.py
│ │ │ ├── box.py
│ │ │ ├── dict.py
│ │ │ ├── discrete.py
│ │ │ ├── multi_binary.py
│ │ │ ├── multi_discrete.py
│ │ │ ├── space.py
│ │ │ ├── tuple.py
│ │ │ └── utils.py
│ │ ├── utils
│ │ │ ├── __init__.py
│ │ │ ├── atomic_write.py
│ │ │ ├── closer.py
│ │ │ ├── colorize.py
│ │ │ ├── env_checker.py
│ │ │ ├── ezpickle.py
│ │ │ ├── json_utils.py
│ │ │ ├── play.py
│ │ │ └── seeding.py
│ │ ├── vector
│ │ │ ├── __init__.py
│ │ │ ├── async_vector_env.py
│ │ │ ├── sync_vector_env.py
│ │ │ ├── utils
│ │ │ │ ├── __init__.py
│ │ │ │ ├── misc.py
│ │ │ │ ├── numpy_utils.py
│ │ │ │ ├── shared_memory.py
│ │ │ │ └── spaces.py
│ │ │ └── vector_env.py
│ │ ├── version.py
│ │ └── wrappers
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── atari_preprocessing.py
│ │ │ ├── clip_action.py
│ │ │ ├── filter_observation.py
│ │ │ ├── flatten_observation.py
│ │ │ ├── frame_stack.py
│ │ │ ├── gray_scale_observation.py
│ │ │ ├── monitor.py
│ │ │ ├── monitoring
│ │ │ ├── __init__.py
│ │ │ ├── stats_recorder.py
│ │ │ └── video_recorder.py
│ │ │ ├── normalize.py
│ │ │ ├── order_enforcing.py
│ │ │ ├── pixel_observation.py
│ │ │ ├── record_episode_statistics.py
│ │ │ ├── record_video.py
│ │ │ ├── rescale_action.py
│ │ │ ├── resize_observation.py
│ │ │ ├── time_aware_observation.py
│ │ │ ├── time_limit.py
│ │ │ ├── transform_observation.py
│ │ │ └── transform_reward.py
│ ├── py.Dockerfile
│ ├── requirements.txt
│ ├── scripts
│ │ └── generate_json.py
│ ├── setup.py
│ ├── test_requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── envs
│ │ ├── __init__.py
│ │ ├── robotics
│ │ │ ├── __init__.py
│ │ │ └── hand
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_manipulate.py
│ │ │ │ ├── test_manipulate_touch_sensors.py
│ │ │ │ └── test_reach.py
│ │ ├── rollout.json
│ │ ├── spec_list.py
│ │ ├── test_atari_legacy_env_specs.py
│ │ ├── test_determinism.py
│ │ ├── test_envs.py
│ │ ├── test_envs_semantics.py
│ │ ├── test_frozenlake_dfs.py
│ │ ├── test_lunar_lander.py
│ │ ├── test_mujoco_v2_to_v3_conversion.py
│ │ └── test_registration.py
│ │ ├── spaces
│ │ ├── __init__.py
│ │ ├── test_spaces.py
│ │ └── test_utils.py
│ │ ├── test_core.py
│ │ ├── utils
│ │ ├── __init__.py
│ │ ├── test_atexit.py
│ │ ├── test_env_checker.py
│ │ └── test_seeding.py
│ │ ├── vector
│ │ ├── __init__.py
│ │ ├── test_async_vector_env.py
│ │ ├── test_numpy_utils.py
│ │ ├── test_shared_memory.py
│ │ ├── test_spaces.py
│ │ ├── test_sync_vector_env.py
│ │ ├── test_vector_env.py
│ │ ├── test_vector_env_wrapper.py
│ │ └── utils.py
│ │ └── wrappers
│ │ ├── __init__.py
│ │ ├── flatten_test.py
│ │ ├── monitoring
│ │ ├── __init__.py
│ │ ├── helpers.py
│ │ └── test_video_recorder.py
│ │ ├── nested_dict_test.py
│ │ ├── test_atari_preprocessing.py
│ │ ├── test_clip_action.py
│ │ ├── test_filter_observation.py
│ │ ├── test_flatten_observation.py
│ │ ├── test_frame_stack.py
│ │ ├── test_gray_scale_observation.py
│ │ ├── test_normalize.py
│ │ ├── test_pixel_observation.py
│ │ ├── test_record_episode_statistics.py
│ │ ├── test_record_video.py
│ │ ├── test_rescale_action.py
│ │ ├── test_resize_observation.py
│ │ ├── test_time_aware_observation.py
│ │ ├── test_transform_observation.py
│ │ └── test_transform_reward.py
└── mujoco-maze
│ ├── .github
│ └── workflows
│ │ └── main.yml
│ ├── .gitignore
│ ├── LICENSE
│ ├── MANIFEST.in
│ ├── README.md
│ ├── mujoco_maze
│ ├── __init__.py
│ ├── agent_model.py
│ ├── ant.py
│ ├── assets
│ │ ├── ant.xml
│ │ ├── point.xml
│ │ ├── reacher.xml
│ │ └── swimmer.xml
│ ├── maze_env.py
│ ├── maze_env_utils.py
│ ├── maze_task.py
│ ├── point.py
│ ├── reacher.py
│ ├── swimmer.py
│ └── websock_viewer.py
│ ├── poetry.lock
│ ├── pyproject.toml
│ ├── screenshots
│ ├── AntFall.png
│ ├── AntPush.png
│ ├── Point4Rooms.png
│ ├── PointBilliard.png
│ ├── PointCorridor.png
│ ├── PointUMaze.png
│ ├── SwimmerSquareRoom.png
│ └── WebVis.png
│ ├── setup.cfg
│ ├── setup.py
│ └── tests
│ ├── test_envs.py
│ └── test_intersect.py
├── helpers
├── __init__.py
├── bary_utils.py
├── custom_callback.py
├── monitor.py
├── utils.py
└── w_encode.py
├── requirements.txt
└── run_maze_continuous.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Peide Huang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GRADIENT: Curriculum Reinforcement Learning using Optimal Transport via Gradual Domain Adaptation
2 |
3 | ## Paper
4 | Huang P, Xu M, Zhu J, Shi L, Fang F, Zhao D. Curriculum reinforcement learning using optimal transport via gradual domain adaptation. Advances in Neural Information Processing Systems. 2022 Dec 6;35:10656-70. https://arxiv.org/abs/2210.10195
5 |
6 | ## Install dependencies
7 | ```
8 | conda create --name gradient python=3.8.12
9 | pip install -r requirements.txt
10 | ```
11 | ```
12 | cd envs/gym && pip install -e .
13 | ```
14 | ```
15 | cd envs/mujoco-maze && pip install -e .
16 | ```
17 |
18 | ## Environments:
19 | - Environments are modified from Mujoco_maze (https://github.com/kngwyu/mujoco-maze) and gym (https://github.com/openai/gym).
20 |
21 | ## Code Usage
22 | ```
23 | python run_maze_continuous.py --curriculum gradient --interp_metric encoding --num_stage 5 --reward_threshold 0.5
24 | python run_maze_continuous.py --curriculum gradient --interp_metric l2 --num_stage 5 --reward_threshold 0.5
25 | ```
--------------------------------------------------------------------------------
/envs/gym/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug Report
3 | about: Submit a bug report
4 | title: "[Bug Report] Bug title"
5 |
6 | ---
7 |
8 | If you are submitting a bug report, please fill in the following details and use the tag [bug].
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **Code example**
14 | Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful.
15 |
16 | **System Info**
17 | Describe the characteristic of your environment:
18 | * Describe how Gym was installed (pip, docker, source, ...)
19 | * What OS/version of Linux you're using. Note that while we will accept PRs to improve Window's support, we do not officially support it.
20 | * Python version
21 |
22 | **Additional context**
23 | Add any other context about the problem here.
24 |
25 | ### Checklist
26 |
27 | - [ ] I have checked that there is no similar [issue](https://github.com/openai/gym/issues) in the repo (**required**)
28 |
--------------------------------------------------------------------------------
/envs/gym/.github/ISSUE_TEMPLATE/proposal.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Proposal
3 | about: Propose changes that are not fixes bugs
4 | title: "[Proposal] Proposal title"
5 | ---
6 |
7 |
8 |
9 | ### Proposal
10 |
11 | A clear and concise description of the proposal.
12 |
13 | ### Motivation
14 |
15 | Please outline the motivation for the proposal.
16 | Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]".
17 | If this is related to another GitHub issue, please link here too.
18 |
19 | ### Pitch
20 |
21 | A clear and concise description of what you want to happen.
22 |
23 | ### Alternatives
24 |
25 | A clear and concise description of any alternative solutions or features you've considered, if any.
26 |
27 | ### Additional context
28 |
29 | Add any other context or screenshots about the feature request here.
30 |
31 | ### Checklist
32 |
33 | - [ ] I have checked that there is no similar [issue](https://github.com/openai/gym/issues) in the repo (**required**)
34 |
--------------------------------------------------------------------------------
/envs/gym/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Question
3 | about: Ask a question
4 | title: "[Question] Question title"
5 | ---
6 |
7 |
8 | ### Question
9 |
10 | If you're a beginner and have basic questions, please ask on [r/reinforcementlearning](https://www.reddit.com/r/reinforcementlearning/) or in the [RL Discord](https://discord.com/invite/xhfNqQv) (if you're new please use the beginners channel). Basic questions that are not bugs or feature requests will be closed without reply, because GitHub issues are not an appropriate venue for these.
11 |
12 | Advanced/nontrivial questions, especially in areas where documentation is lacking, are very much welcome.
13 |
--------------------------------------------------------------------------------
/envs/gym/.github/stale.yml:
--------------------------------------------------------------------------------
1 | # Configuration for probot-stale - https://github.com/probot/stale
2 |
3 | # Number of days of inactivity before an Issue or Pull Request becomes stale
4 | daysUntilStale: 60
5 |
6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
8 | daysUntilClose: 14
9 |
10 | # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
11 | onlyLabels:
12 | - more-information-needed
13 |
14 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
15 | exemptLabels:
16 | - pinned
17 | - security
18 | - "[Status] Maybe Later"
19 |
20 | # Set to true to ignore issues in a project (defaults to false)
21 | exemptProjects: true
22 |
23 | # Set to true to ignore issues in a milestone (defaults to false)
24 | exemptMilestones: true
25 |
26 | # Set to true to ignore issues with an assignee (defaults to false)
27 | exemptAssignees: true
28 |
29 | # Label to use when marking as stale
30 | staleLabel: stale
31 |
32 | # Comment to post when marking as stale. Set to `false` to disable
33 | markComment: >
34 | This issue has been automatically marked as stale because it has not had
35 | recent activity. It will be closed if no further activity occurs. Thank you
36 | for your contributions.
37 |
38 | # Comment to post when removing the stale label.
39 | # unmarkComment: >
40 | # Your comment here.
41 |
42 | # Comment to post when closing a stale Issue or Pull Request.
43 | # closeComment: >
44 | # Your comment here.
45 |
46 | # Limit the number of actions per hour, from 1-30. Default is 30
47 | limitPerRun: 30
48 |
49 | # Limit to only `issues` or `pulls`
50 | only: issues
51 |
52 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
53 | # pulls:
54 | # daysUntilStale: 30
55 | # markComment: >
56 | # This pull request has been automatically marked as stale because it has not had
57 | # recent activity. It will be closed if no further activity occurs. Thank you
58 | # for your contributions.
59 |
60 | # issues:
61 | # exemptLabels:
62 | # - confirmed
--------------------------------------------------------------------------------
/envs/gym/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 | on: [pull_request, push]
3 |
4 | jobs:
5 | build:
6 | runs-on: ubuntu-latest
7 | strategy:
8 | matrix:
9 | python-version: [3.7, 3.8, 3.9]
10 | steps:
11 | - uses: actions/checkout@v2
12 | - run: |
13 | docker build -f py.Dockerfile \
14 | --build-arg MUJOCO_KEY=$MUJOCO_KEY \
15 | --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
16 | --tag gym-docker .
17 | - name: Run tests
18 | run: docker run gym-docker pytest --forked --import-mode=append
19 |
--------------------------------------------------------------------------------
/envs/gym/.github/workflows/lint_python.yml:
--------------------------------------------------------------------------------
1 | name: lint_python
2 | on: [pull_request, push]
3 | jobs:
4 | lint_python:
5 | runs-on: ubuntu-latest
6 | steps:
7 | - uses: actions/checkout@v2
8 | - uses: actions/setup-python@v2
9 | - run: pip install isort mypy pytest pyupgrade safety
10 | - run: isort --check-only --profile black . || true
11 | - run: pip install -e .[nomujoco]
12 | - run: mypy --install-types --non-interactive . || true
13 | - run: pytest . || true
14 | - run: pytest --doctest-modules . || true
15 | - run: shopt -s globstar && pyupgrade --py36-plus **/*.py || true
16 |
--------------------------------------------------------------------------------
/envs/gym/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
1 | # https://pre-commit.com
2 | # This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file.
3 | name: pre-commit
4 | on:
5 | pull_request:
6 | push:
7 | branches: [master]
8 | jobs:
9 | pre-commit:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v2
13 | - uses: actions/setup-python@v2
14 | - run: pip install pre-commit
15 | - run: pre-commit --version
16 | - run: pre-commit install
17 | - run: pre-commit run --all-files
18 |
--------------------------------------------------------------------------------
/envs/gym/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | *.py~
4 | .DS_Store
5 | .cache
6 | .pytest_cache/
7 |
8 | # Setuptools distribution and build folders.
9 | /dist/
10 | /build
11 |
12 | # Virtualenv
13 | /env
14 |
15 | # Python egg metadata, regenerated from source files by setuptools.
16 | /*.egg-info
17 |
18 | *.sublime-project
19 | *.sublime-workspace
20 |
21 | logs/
22 |
23 | .ipynb_checkpoints
24 | ghostdriver.log
25 |
26 | junk
27 | MUJOCO_LOG.txt
28 |
29 | rllab_mujoco
30 |
31 | tutorial/*.html
32 |
33 | # IDE files
34 | .eggs
35 | .tox
36 |
37 | # PyCharm project files
38 | .idea
39 | vizdoom.ini
40 |
--------------------------------------------------------------------------------
/envs/gym/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/PyCQA/bandit/
3 | rev: 1.7.0
4 | hooks:
5 | - id: bandit
6 | args:
7 | - --recursive
8 | - --skip
9 | - B101,B108,B301,B403,B404,B603
10 | - .
11 | - repo: https://github.com/python/black
12 | rev: 21.7b0
13 | hooks:
14 | - id: black
15 | - repo: https://github.com/codespell-project/codespell
16 | rev: v2.1.0
17 | hooks:
18 | - id: codespell
19 | args:
20 | - --ignore-words-list=nd,reacher,thist,ths
21 | - repo: https://gitlab.com/pycqa/flake8
22 | rev: 3.9.2
23 | hooks:
24 | - id: flake8
25 | args:
26 | - --ignore=E203,E402,E712,E722,E731,E741,F401,F403,F405,F524,F841,W503
27 | - --max-complexity=30
28 | - --max-line-length=456
29 | - --show-source
30 | - --statistics
31 |
--------------------------------------------------------------------------------
/envs/gym/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
1 | OpenAI Gym is dedicated to providing a harassment-free experience for
2 | everyone, regardless of gender, gender identity and expression, sexual
3 | orientation, disability, physical appearance, body size, age, race, or
4 | religion. We do not tolerate harassment of participants in any form.
5 |
6 | This code of conduct applies to all OpenAI Gym spaces (including Gist
7 | comments) both online and off. Anyone who violates this code of
8 | conduct may be sanctioned or expelled from these spaces at the
9 | discretion of the OpenAI team.
10 |
11 | We may add additional rules over time, which will be made clearly
12 | available to participants. Participants are responsible for knowing
13 | and abiding by these rules.
14 |
--------------------------------------------------------------------------------
/envs/gym/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Gym Contribution Guidelines
2 |
3 | At this time we are currently accepting the current forms of contributions:
4 |
5 | - Bug reports (keep in mind that changing environment behavior should be minimized as that requires releasing a new version of the environment and makes results hard to compare across versions)
6 | - Pull requests for bug fixes
7 | - Documentation improvements
8 |
9 | Notably, we are not accepting these forms of contributions:
10 |
11 | - New environments
12 | - New features
13 |
14 | This may change in the future.
15 | If you wish to make a Gym environment, follow the instructions in [Creating Environments](https://github.com/openai/gym/blob/master/docs/creating-environments.md). When your environment works, you can make a PR to add it to the bottom of the [List of Environments](https://github.com/openai/gym/blob/master/docs/environments.md).
16 |
17 |
18 | Edit July 27, 2021: Please see https://github.com/openai/gym/issues/2259 for new contributing standards
19 |
--------------------------------------------------------------------------------
/envs/gym/LICENSE.md:
--------------------------------------------------------------------------------
1 | # gym
2 |
3 | The MIT License
4 |
5 | Copyright (c) 2016 OpenAI (https://openai.com)
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 |
25 | # Mujoco models
26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
27 | ```
28 | This file is part of MuJoCo.
29 | Copyright 2009-2015 Roboti LLC.
30 | Mujoco :: Advanced physics simulation engine
31 | Source : www.roboti.us
32 | Version : 1.31
33 | Released : 23Apr16
34 | Author :: Vikash Kumar
35 | Contacts : kumar@roboti.us
36 | ```
37 |
--------------------------------------------------------------------------------
/envs/gym/bin/docker_entrypoint:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is the entrypoint for our Docker image.
3 |
4 | set -ex
5 |
6 | # Set up display; otherwise rendering will fail
7 | Xvfb -screen 0 1024x768x24 &
8 | export DISPLAY=:0
9 |
10 | # Wait for the file to come up
11 | display=0
12 | file="/tmp/.X11-unix/X$display"
13 | for i in $(seq 1 10); do
14 | if [ -e "$file" ]; then
15 | break
16 | fi
17 |
18 | echo "Waiting for $file to be created (try $i/10)"
19 | sleep "$i"
20 | done
21 | if ! [ -e "$file" ]; then
22 | echo "Timing out: $file was not created"
23 | exit 1
24 | fi
25 |
26 | exec "$@"
27 |
--------------------------------------------------------------------------------
/envs/gym/docs/creating_environments.md:
--------------------------------------------------------------------------------
1 | # How to create new environments for Gym
2 |
3 | * Create a new repo called gym-foo, which should also be a PIP package.
4 |
5 | * A good example is https://github.com/openai/gym-soccer.
6 |
7 | * It should have at least the following files:
8 | ```sh
9 | gym-foo/
10 | README.md
11 | setup.py
12 | gym_foo/
13 | __init__.py
14 | envs/
15 | __init__.py
16 | foo_env.py
17 | foo_extrahard_env.py
18 | ```
19 |
20 | * `gym-foo/setup.py` should have:
21 |
22 | ```python
23 | from setuptools import setup
24 |
25 | setup(name='gym_foo',
26 | version='0.0.1',
27 | install_requires=['gym'] # And any other dependencies foo needs
28 | )
29 | ```
30 |
31 | * `gym-foo/gym_foo/__init__.py` should have:
32 | ```python
33 | from gym.envs.registration import register
34 |
35 | register(
36 | id='foo-v0',
37 | entry_point='gym_foo.envs:FooEnv',
38 | )
39 | register(
40 | id='foo-extrahard-v0',
41 | entry_point='gym_foo.envs:FooExtraHardEnv',
42 | )
43 | ```
44 |
45 | * `gym-foo/gym_foo/envs/__init__.py` should have:
46 | ```python
47 | from gym_foo.envs.foo_env import FooEnv
48 | from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv
49 | ```
50 |
51 | * `gym-foo/gym_foo/envs/foo_env.py` should look something like:
52 | ```python
53 | import gym
54 | from gym import error, spaces, utils
55 | from gym.utils import seeding
56 |
57 | class FooEnv(gym.Env):
58 | metadata = {'render.modes': ['human']}
59 |
60 | def __init__(self):
61 | ...
62 | def step(self, action):
63 | ...
64 | def reset(self):
65 | ...
66 | def render(self, mode='human'):
67 | ...
68 | def close(self):
69 | ...
70 | ```
71 |
72 | * After you have installed your package with `pip install -e gym-foo`, you can create an instance of the environment with `gym.make('gym_foo:foo-v0')`
73 |
--------------------------------------------------------------------------------
/envs/gym/docs/toy_text/blackjack.md:
--------------------------------------------------------------------------------
1 | Blackjack
2 | ---
3 | |Title|Action Type|Action Shape|Action Values|Observation Shape|Observation Values|Average Total Reward|Import|
4 | | ----------- | -----------| ----------- | -----------| ----------- | -----------| ----------- | -----------|
5 | |Blackjack|Discrete|(1,)|(0,1)|(3,)|[(0,31),(0,10),(0,1)]| |from gym.envs.toy_text import blackjack|
6 | ---
7 |
8 | Blackjack is a card game where the goal is to obtain cards that sum to as near as possible to 21 without going over. They're playing against a fixed dealer.
9 |
10 | Card Values:
11 |
12 | - Face cards (Jack, Queen, King) have point value 10.
13 | - Aces can either count as 11 or 1, and it's called 'usable ace' at 11.
14 | - Numerical cards (2-9) have value of their number.
15 |
16 | This game is placed with an infinite deck (or with replacement).
17 | The game starts with dealer having one face up and one face down card, while player having two face up cards.
18 |
19 | The player can request additional cards (hit, action=1) until they decide to stop
20 | (stick, action=0) or exceed 21 (bust).
21 | After the player sticks, the dealer reveals their facedown card, and draws
22 | until their sum is 17 or greater. If the dealer goes bust the player wins.
23 | If neither player nor dealer busts, the outcome (win, lose, draw) is
24 | decided by whose sum is closer to 21.
25 |
26 | The agent take a 1-element vector for actions.
27 | The action space is `(action)`, where:
28 | - `action` is used to decide stick/hit for values (0,1).
29 |
30 | The observation of a 3-tuple of: the players current sum,
31 | the dealer's one showing card (1-10 where 1 is ace), and whether or not the player holds a usable ace (0 or 1).
32 |
33 | This environment corresponds to the version of the blackjack problem
34 | described in Example 5.1 in Reinforcement Learning: An Introduction
35 | by Sutton and Barto.
36 | http://incompleteideas.net/book/the-book-2nd.html
37 |
38 | **Rewards:**
39 |
40 | Reward schedule:
41 | - win game: +1
42 | - lose game: -1
43 | - draw game: 0
44 | - win game with natural blackjack:
45 |
46 | +1.5 (if natural is True.)
47 |
48 | +1 (if natural is False.)
49 |
50 | ### Arguments
51 |
52 | ```
53 | gym.make('Blackjack-v0', natural=False)
54 | ```
55 |
56 | `natural`: Whether to give an additional reward for starting with a natural blackjack, i.e. starting with an ace and ten (sum is 21).
57 |
58 | ### Version History
59 |
60 | * v0: Initial versions release (1.0.0)
61 |
--------------------------------------------------------------------------------
/envs/gym/docs/toy_text/frozen_lake.md:
--------------------------------------------------------------------------------
1 | Frozen Lake
2 | ---
3 | |Title|Action Type|Action Shape|Action Values|Observation Shape|Observation Values|Average Total Reward|Import|
4 | | ----------- | -----------| ----------- | -----------| ----------- | -----------| ----------- | -----------|
5 | |Frozen Lake|Discrete|(1,)|(0,3)|(1,)|(0,nrows*ncolumns)| |from gym.envs.toy_text import frozen_lake|
6 | ---
7 |
8 |
9 | Frozen lake involves crossing a frozen lake from Start(S) to goal(G) without falling into any holes(H). The agent may not always move in the intended direction due to the slippery nature of the frozen lake.
10 |
11 | The agent take a 1-element vector for actions.
12 | The action space is `(dir)`, where `dir` decides direction to move in which can be:
13 |
14 | - 0: LEFT
15 | - 1: DOWN
16 | - 2: RIGHT
17 | - 3: UP
18 |
19 | The observation is a value representing the agents current position as
20 |
21 | current_row * nrows + current_col
22 |
23 | **Rewards:**
24 |
25 | Reward schedule:
26 | - Reach goal(G): +1
27 | - Reach hole(H): 0
28 |
29 | ### Arguments
30 |
31 | ```
32 | gym.make('FrozenLake-v0', desc=None,map_name="4x4", is_slippery=True)
33 | ```
34 |
35 | `desc`: Used to specify custom map for frozen lake. For example,
36 |
37 | desc=["SFFF", "FHFH", "FFFH", "HFFG"].
38 |
39 | `map_name`: ID to use any of the preloaded maps.
40 |
41 | "4x4":[
42 | "SFFF",
43 | "FHFH",
44 | "FFFH",
45 | "HFFG"
46 | ]
47 |
48 | "8x8": [
49 | "SFFFFFFF",
50 | "FFFFFFFF",
51 | "FFFHFFFF",
52 | "FFFFFHFF",
53 | "FFFHFFFF",
54 | "FHHFFFHF",
55 | "FHFFHFHF",
56 | "FFFHFFFG",
57 | ]
58 |
59 |
60 |
61 |
62 | `is_slippery`: True/False. If True will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions.
63 |
64 | For example, if action is left and is_slippery is True, then:
65 | - P(move left)=1/3
66 | - P(move up)=1/3
67 | - P(move down)=1/3
68 | ### Version History
69 |
70 | * v0: Initial versions release (1.0.0)
71 |
--------------------------------------------------------------------------------
/envs/gym/docs/tutorials.md:
--------------------------------------------------------------------------------
1 | ## Getting Started With OpenAI Gym: The Basic Building Blocks
2 |
3 | https://blog.paperspace.com/getting-started-with-openai-gym/
4 |
5 | A good starting point explaining all the basic building blocks of the Gym API.
6 |
7 |
8 |
9 | ## Reinforcement Q-Learning from Scratch in Python with OpenAI Gym
10 | Good Algorithmic Introduction to Reinforcement Learning showcasing how to use Gym API for Training Agents.
11 |
12 | https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/
13 |
14 |
15 | ## Tutorial: An Introduction to Reinforcement Learning Using OpenAI Gym
16 |
17 | https://www.gocoder.one/blog/rl-tutorial-with-openai-gym
18 |
--------------------------------------------------------------------------------
/envs/gym/gym/__init__.py:
--------------------------------------------------------------------------------
1 | import distutils.version
2 | import os
3 | import sys
4 | import warnings
5 |
6 | from gym import error
7 | from gym.version import VERSION as __version__
8 |
9 | from gym.core import (
10 | Env,
11 | GoalEnv,
12 | Wrapper,
13 | ObservationWrapper,
14 | ActionWrapper,
15 | RewardWrapper,
16 | )
17 | from gym.spaces import Space
18 | from gym.envs import make, spec, register
19 | from gym import logger
20 | from gym import vector
21 | from gym import wrappers
22 |
23 | __all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"]
24 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/README.md:
--------------------------------------------------------------------------------
1 | # Envs
2 |
3 | These are the core integrated environments. Note that we may later
4 | restructure any of the files, but will keep the environments available
5 | at the relevant package's top-level. So for example, you should access
6 | `AntEnv` as follows:
7 |
8 | ```
9 | # Will be supported in future releases
10 | from gym.envs import mujoco
11 | mujoco.AntEnv
12 | ```
13 |
14 | Rather than:
15 |
16 | ```
17 | # May break in future releases
18 | from gym.envs.mujoco import ant
19 | ant.AntEnv
20 | ```
--------------------------------------------------------------------------------
/envs/gym/gym/envs/box2d/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | import Box2D
3 | from gym.envs.box2d.lunar_lander import LunarLander
4 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous
5 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
6 | from gym.envs.box2d.car_racing import CarRacing
7 | except ImportError:
8 | Box2D = None
9 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/box2d/parking_garage/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "André Biedenkapp"
2 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/classic_control/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.classic_control.cartpole import CartPoleEnv
2 | from gym.envs.classic_control.mountain_car import MountainCarEnv
3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
4 | from gym.envs.classic_control.pendulum import PendulumEnv
5 | from gym.envs.classic_control.acrobot import AcrobotEnv
6 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/classic_control/assets/clockwise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/classic_control/assets/clockwise.png
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco.mujoco_env import MujocoEnv
2 |
3 | # ^^^^^ so that user gets the correct error
4 | # message if mujoco is not installed correctly
5 | from gym.envs.mujoco.ant import AntEnv
6 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
7 | from gym.envs.mujoco.hopper import HopperEnv
8 | from gym.envs.mujoco.walker2d import Walker2dEnv
9 | from gym.envs.mujoco.humanoid import HumanoidEnv
10 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
11 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
12 | from gym.envs.mujoco.reacher import ReacherEnv
13 | from gym.envs.mujoco.swimmer import SwimmerEnv
14 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
15 | from gym.envs.mujoco.pusher import PusherEnv
16 | from gym.envs.mujoco.thrower import ThrowerEnv
17 | from gym.envs.mujoco.striker import StrikerEnv
18 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/ant.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, a):
12 | xposbefore = self.get_body_com("torso")[0]
13 | self.do_simulation(a, self.frame_skip)
14 | xposafter = self.get_body_com("torso")[0]
15 | forward_reward = (xposafter - xposbefore) / self.dt
16 | ctrl_cost = 0.5 * np.square(a).sum()
17 | contact_cost = (
18 | 0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
19 | )
20 | survive_reward = 1.0
21 | reward = forward_reward - ctrl_cost - contact_cost + survive_reward
22 | state = self.state_vector()
23 | notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
24 | done = not notdone
25 | ob = self._get_obs()
26 | return (
27 | ob,
28 | reward,
29 | done,
30 | dict(
31 | reward_forward=forward_reward,
32 | reward_ctrl=-ctrl_cost,
33 | reward_contact=-contact_cost,
34 | reward_survive=survive_reward,
35 | ),
36 | )
37 |
38 | def _get_obs(self):
39 | return np.concatenate(
40 | [
41 | self.sim.data.qpos.flat[2:],
42 | self.sim.data.qvel.flat,
43 | np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
44 | ]
45 | )
46 |
47 | def reset_model(self):
48 | qpos = self.init_qpos + self.np_random.uniform(
49 | size=self.model.nq, low=-0.1, high=0.1
50 | )
51 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
52 | self.set_state(qpos, qvel)
53 | return self._get_obs()
54 |
55 | def viewer_setup(self):
56 | self.viewer.cam.distance = self.model.stat.extent * 0.5
57 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/assets/inverted_double_pendulum.xml:
--------------------------------------------------------------------------------
1 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/assets/inverted_pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/assets/point.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/assets/reacher.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/assets/swimmer.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/half_cheetah.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 5)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, action):
12 | xposbefore = self.sim.data.qpos[0]
13 | self.do_simulation(action, self.frame_skip)
14 | xposafter = self.sim.data.qpos[0]
15 | ob = self._get_obs()
16 | reward_ctrl = -0.1 * np.square(action).sum()
17 | reward_run = (xposafter - xposbefore) / self.dt
18 | reward = reward_ctrl + reward_run
19 | done = False
20 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
21 |
22 | def _get_obs(self):
23 | return np.concatenate(
24 | [
25 | self.sim.data.qpos.flat[1:],
26 | self.sim.data.qvel.flat,
27 | ]
28 | )
29 |
30 | def reset_model(self):
31 | qpos = self.init_qpos + self.np_random.uniform(
32 | low=-0.1, high=0.1, size=self.model.nq
33 | )
34 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
35 | self.set_state(qpos, qvel)
36 | return self._get_obs()
37 |
38 | def viewer_setup(self):
39 | self.viewer.cam.distance = self.model.stat.extent * 0.5
40 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/hopper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, a):
12 | posbefore = self.sim.data.qpos[0]
13 | self.do_simulation(a, self.frame_skip)
14 | posafter, height, ang = self.sim.data.qpos[0:3]
15 | alive_bonus = 1.0
16 | reward = (posafter - posbefore) / self.dt
17 | reward += alive_bonus
18 | reward -= 1e-3 * np.square(a).sum()
19 | s = self.state_vector()
20 | done = not (
21 | np.isfinite(s).all()
22 | and (np.abs(s[2:]) < 100).all()
23 | and (height > 0.7)
24 | and (abs(ang) < 0.2)
25 | )
26 | ob = self._get_obs()
27 | return ob, reward, done, {}
28 |
29 | def _get_obs(self):
30 | return np.concatenate(
31 | [self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
32 | )
33 |
34 | def reset_model(self):
35 | qpos = self.init_qpos + self.np_random.uniform(
36 | low=-0.005, high=0.005, size=self.model.nq
37 | )
38 | qvel = self.init_qvel + self.np_random.uniform(
39 | low=-0.005, high=0.005, size=self.model.nv
40 | )
41 | self.set_state(qpos, qvel)
42 | return self._get_obs()
43 |
44 | def viewer_setup(self):
45 | self.viewer.cam.trackbodyid = 2
46 | self.viewer.cam.distance = self.model.stat.extent * 0.75
47 | self.viewer.cam.lookat[2] = 1.15
48 | self.viewer.cam.elevation = -20
49 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/humanoid.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym.envs.mujoco import mujoco_env
3 | from gym import utils
4 |
5 |
6 | def mass_center(model, sim):
7 | mass = np.expand_dims(model.body_mass, 1)
8 | xpos = sim.data.xipos
9 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
10 |
11 |
12 | class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13 | def __init__(self):
14 | mujoco_env.MujocoEnv.__init__(self, "humanoid.xml", 5)
15 | utils.EzPickle.__init__(self)
16 |
17 | def _get_obs(self):
18 | data = self.sim.data
19 | return np.concatenate(
20 | [
21 | data.qpos.flat[2:],
22 | data.qvel.flat,
23 | data.cinert.flat,
24 | data.cvel.flat,
25 | data.qfrc_actuator.flat,
26 | data.cfrc_ext.flat,
27 | ]
28 | )
29 |
30 | def step(self, a):
31 | pos_before = mass_center(self.model, self.sim)
32 | self.do_simulation(a, self.frame_skip)
33 | pos_after = mass_center(self.model, self.sim)
34 | alive_bonus = 5.0
35 | data = self.sim.data
36 | lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
37 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
38 | quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
39 | quad_impact_cost = min(quad_impact_cost, 10)
40 | reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
41 | qpos = self.sim.data.qpos
42 | done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
43 | return (
44 | self._get_obs(),
45 | reward,
46 | done,
47 | dict(
48 | reward_linvel=lin_vel_cost,
49 | reward_quadctrl=-quad_ctrl_cost,
50 | reward_alive=alive_bonus,
51 | reward_impact=-quad_impact_cost,
52 | ),
53 | )
54 |
55 | def reset_model(self):
56 | c = 0.01
57 | self.set_state(
58 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
59 | self.init_qvel
60 | + self.np_random.uniform(
61 | low=-c,
62 | high=c,
63 | size=self.model.nv,
64 | ),
65 | )
66 | return self._get_obs()
67 |
68 | def viewer_setup(self):
69 | self.viewer.cam.trackbodyid = 1
70 | self.viewer.cam.distance = self.model.stat.extent * 1.0
71 | self.viewer.cam.lookat[2] = 2.0
72 | self.viewer.cam.elevation = -20
73 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/humanoidstandup.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco import mujoco_env
2 | from gym import utils
3 | import numpy as np
4 |
5 |
6 | class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5)
9 | utils.EzPickle.__init__(self)
10 |
11 | def _get_obs(self):
12 | data = self.sim.data
13 | return np.concatenate(
14 | [
15 | data.qpos.flat[2:],
16 | data.qvel.flat,
17 | data.cinert.flat,
18 | data.cvel.flat,
19 | data.qfrc_actuator.flat,
20 | data.cfrc_ext.flat,
21 | ]
22 | )
23 |
24 | def step(self, a):
25 | self.do_simulation(a, self.frame_skip)
26 | pos_after = self.sim.data.qpos[2]
27 | data = self.sim.data
28 | uph_cost = (pos_after - 0) / self.model.opt.timestep
29 |
30 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
31 | quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
32 | quad_impact_cost = min(quad_impact_cost, 10)
33 | reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
34 |
35 | done = bool(False)
36 | return (
37 | self._get_obs(),
38 | reward,
39 | done,
40 | dict(
41 | reward_linup=uph_cost,
42 | reward_quadctrl=-quad_ctrl_cost,
43 | reward_impact=-quad_impact_cost,
44 | ),
45 | )
46 |
47 | def reset_model(self):
48 | c = 0.01
49 | self.set_state(
50 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
51 | self.init_qvel
52 | + self.np_random.uniform(
53 | low=-c,
54 | high=c,
55 | size=self.model.nv,
56 | ),
57 | )
58 | return self._get_obs()
59 |
60 | def viewer_setup(self):
61 | self.viewer.cam.trackbodyid = 1
62 | self.viewer.cam.distance = self.model.stat.extent * 1.0
63 | self.viewer.cam.lookat[2] = 0.8925
64 | self.viewer.cam.elevation = -20
65 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/inverted_double_pendulum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "inverted_double_pendulum.xml", 5)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, action):
12 | self.do_simulation(action, self.frame_skip)
13 | ob = self._get_obs()
14 | x, _, y = self.sim.data.site_xpos[0]
15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
16 | v1, v2 = self.sim.data.qvel[1:3]
17 | vel_penalty = 1e-3 * v1 ** 2 + 5e-3 * v2 ** 2
18 | alive_bonus = 10
19 | r = alive_bonus - dist_penalty - vel_penalty
20 | done = bool(y <= 1)
21 | return ob, r, done, {}
22 |
23 | def _get_obs(self):
24 | return np.concatenate(
25 | [
26 | self.sim.data.qpos[:1], # cart x pos
27 | np.sin(self.sim.data.qpos[1:]), # link angles
28 | np.cos(self.sim.data.qpos[1:]),
29 | np.clip(self.sim.data.qvel, -10, 10),
30 | np.clip(self.sim.data.qfrc_constraint, -10, 10),
31 | ]
32 | ).ravel()
33 |
34 | def reset_model(self):
35 | self.set_state(
36 | self.init_qpos
37 | + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
38 | self.init_qvel + self.np_random.randn(self.model.nv) * 0.1,
39 | )
40 | return self._get_obs()
41 |
42 | def viewer_setup(self):
43 | v = self.viewer
44 | v.cam.trackbodyid = 0
45 | v.cam.distance = self.model.stat.extent * 0.5
46 | v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]
47 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/inverted_pendulum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | utils.EzPickle.__init__(self)
9 | mujoco_env.MujocoEnv.__init__(self, "inverted_pendulum.xml", 2)
10 |
11 | def step(self, a):
12 | reward = 1.0
13 | self.do_simulation(a, self.frame_skip)
14 | ob = self._get_obs()
15 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= 0.2)
16 | done = not notdone
17 | return ob, reward, done, {}
18 |
19 | def reset_model(self):
20 | qpos = self.init_qpos + self.np_random.uniform(
21 | size=self.model.nq, low=-0.01, high=0.01
22 | )
23 | qvel = self.init_qvel + self.np_random.uniform(
24 | size=self.model.nv, low=-0.01, high=0.01
25 | )
26 | self.set_state(qpos, qvel)
27 | return self._get_obs()
28 |
29 | def _get_obs(self):
30 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
31 |
32 | def viewer_setup(self):
33 | v = self.viewer
34 | v.cam.trackbodyid = 0
35 | v.cam.distance = self.model.stat.extent
36 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/pusher.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | import mujoco_py
6 |
7 |
8 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
9 | def __init__(self):
10 | utils.EzPickle.__init__(self)
11 | mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5)
12 |
13 | def step(self, a):
14 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
15 | vec_2 = self.get_body_com("object") - self.get_body_com("goal")
16 |
17 | reward_near = -np.linalg.norm(vec_1)
18 | reward_dist = -np.linalg.norm(vec_2)
19 | reward_ctrl = -np.square(a).sum()
20 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
21 |
22 | self.do_simulation(a, self.frame_skip)
23 | ob = self._get_obs()
24 | done = False
25 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
26 |
27 | def viewer_setup(self):
28 | self.viewer.cam.trackbodyid = -1
29 | self.viewer.cam.distance = 4.0
30 |
31 | def reset_model(self):
32 | qpos = self.init_qpos
33 |
34 | self.goal_pos = np.asarray([0, 0])
35 | while True:
36 | # self.cylinder_pos = np.concatenate(
37 | # [
38 | # self.np_random.uniform(low=-0.3, high=0, size=1),
39 | # self.np_random.uniform(low=-0.2, high=0.2, size=1),
40 | # ]
41 | # )
42 | random_idx = self.np_random.choice(self.inter_context.shape[0])
43 | self.cylinder_pos = self.inter_context[random_idx, :] + 0.01*np.random.randn(2, )
44 | # print(self.cylinder_pos)
45 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
46 | break
47 |
48 | qpos[-4:-2] = self.cylinder_pos
49 | qpos[-2:] = self.goal_pos
50 | qvel = self.init_qvel + self.np_random.uniform(
51 | low=-0.005, high=0.005, size=self.model.nv
52 | )
53 | qvel[-4:] = 0
54 | self.set_state(qpos, qvel)
55 | return self._get_obs()
56 |
57 | def set_context_dist(self, context_dist):
58 | self.inter_context = context_dist
59 |
60 | def _get_obs(self):
61 | return np.concatenate(
62 | [
63 | self.sim.data.qpos.flat[:7],
64 | self.sim.data.qvel.flat[:7],
65 | self.get_body_com("tips_arm"),
66 | self.get_body_com("object"),
67 | self.get_body_com("goal"),
68 | ]
69 | )
70 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/striker.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | utils.EzPickle.__init__(self)
9 | self._striked = False
10 | self._min_strike_dist = np.inf
11 | self.strike_threshold = 0.1
12 | mujoco_env.MujocoEnv.__init__(self, "striker.xml", 5)
13 |
14 | def step(self, a):
15 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
16 | vec_2 = self.get_body_com("object") - self.get_body_com("goal")
17 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2))
18 |
19 | if np.linalg.norm(vec_1) < self.strike_threshold:
20 | self._striked = True
21 | self._strike_pos = self.get_body_com("tips_arm")
22 |
23 | if self._striked:
24 | vec_3 = self.get_body_com("object") - self._strike_pos
25 | reward_near = -np.linalg.norm(vec_3)
26 | else:
27 | reward_near = -np.linalg.norm(vec_1)
28 |
29 | reward_dist = -np.linalg.norm(self._min_strike_dist)
30 | reward_ctrl = -np.square(a).sum()
31 | reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
32 |
33 | self.do_simulation(a, self.frame_skip)
34 | ob = self._get_obs()
35 | done = False
36 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
37 |
38 | def viewer_setup(self):
39 | self.viewer.cam.trackbodyid = 0
40 | self.viewer.cam.distance = 4.0
41 |
42 | def reset_model(self):
43 | self._min_strike_dist = np.inf
44 | self._striked = False
45 | self._strike_pos = None
46 |
47 | qpos = self.init_qpos
48 |
49 | self.ball = np.array([0.5, -0.175])
50 | while True:
51 | self.goal = np.concatenate(
52 | [
53 | self.np_random.uniform(low=0.15, high=0.7, size=1),
54 | self.np_random.uniform(low=0.1, high=1.0, size=1),
55 | ]
56 | )
57 | if np.linalg.norm(self.ball - self.goal) > 0.17:
58 | break
59 |
60 | qpos[-9:-7] = [self.ball[1], self.ball[0]]
61 | qpos[-7:-5] = self.goal
62 | diff = self.ball - self.goal
63 | angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
64 | qpos[-1] = angle / 3.14
65 | qvel = self.init_qvel + self.np_random.uniform(
66 | low=-0.1, high=0.1, size=self.model.nv
67 | )
68 | qvel[7:] = 0
69 | self.set_state(qpos, qvel)
70 | return self._get_obs()
71 |
72 | def _get_obs(self):
73 | return np.concatenate(
74 | [
75 | self.sim.data.qpos.flat[:7],
76 | self.sim.data.qvel.flat[:7],
77 | self.get_body_com("tips_arm"),
78 | self.get_body_com("object"),
79 | self.get_body_com("goal"),
80 | ]
81 | )
82 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/swimmer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "swimmer.xml", 4)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, a):
12 | ctrl_cost_coeff = 0.0001
13 | xposbefore = self.sim.data.qpos[0]
14 | self.do_simulation(a, self.frame_skip)
15 | xposafter = self.sim.data.qpos[0]
16 | reward_fwd = (xposafter - xposbefore) / self.dt
17 | reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
18 | reward = reward_fwd + reward_ctrl
19 | ob = self._get_obs()
20 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
21 |
22 | def _get_obs(self):
23 | qpos = self.sim.data.qpos
24 | qvel = self.sim.data.qvel
25 | return np.concatenate([qpos.flat[2:], qvel.flat])
26 |
27 | def reset_model(self):
28 | self.set_state(
29 | self.init_qpos
30 | + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
31 | self.init_qvel
32 | + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv),
33 | )
34 | return self._get_obs()
35 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/thrower.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | utils.EzPickle.__init__(self)
9 | self._ball_hit_ground = False
10 | self._ball_hit_location = None
11 | mujoco_env.MujocoEnv.__init__(self, "thrower.xml", 5)
12 |
13 | def step(self, a):
14 | ball_xy = self.get_body_com("ball")[:2]
15 | goal_xy = self.get_body_com("goal")[:2]
16 |
17 | if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25:
18 | self._ball_hit_ground = True
19 | self._ball_hit_location = self.get_body_com("ball")
20 |
21 | if self._ball_hit_ground:
22 | ball_hit_xy = self._ball_hit_location[:2]
23 | reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy)
24 | else:
25 | reward_dist = -np.linalg.norm(ball_xy - goal_xy)
26 | reward_ctrl = -np.square(a).sum()
27 |
28 | reward = reward_dist + 0.002 * reward_ctrl
29 | self.do_simulation(a, self.frame_skip)
30 | ob = self._get_obs()
31 | done = False
32 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
33 |
34 | def viewer_setup(self):
35 | self.viewer.cam.trackbodyid = 0
36 | self.viewer.cam.distance = 4.0
37 |
38 | def reset_model(self):
39 | self._ball_hit_ground = False
40 | self._ball_hit_location = None
41 |
42 | qpos = self.init_qpos
43 | self.goal = np.array(
44 | [
45 | self.np_random.uniform(low=-0.3, high=0.3),
46 | self.np_random.uniform(low=-0.3, high=0.3),
47 | ]
48 | )
49 |
50 | qpos[-9:-7] = self.goal
51 | qvel = self.init_qvel + self.np_random.uniform(
52 | low=-0.005, high=0.005, size=self.model.nv
53 | )
54 | qvel[7:] = 0
55 | self.set_state(qpos, qvel)
56 | return self._get_obs()
57 |
58 | def _get_obs(self):
59 | return np.concatenate(
60 | [
61 | self.sim.data.qpos.flat[:7],
62 | self.sim.data.qvel.flat[:7],
63 | self.get_body_com("r_wrist_roll_link"),
64 | self.get_body_com("ball"),
65 | self.get_body_com("goal"),
66 | ]
67 | )
68 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/mujoco/walker2d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 |
6 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, a):
12 | posbefore = self.sim.data.qpos[0]
13 | self.do_simulation(a, self.frame_skip)
14 | posafter, height, ang = self.sim.data.qpos[0:3]
15 | alive_bonus = 1.0
16 | reward = (posafter - posbefore) / self.dt
17 | reward += alive_bonus
18 | reward -= 1e-3 * np.square(a).sum()
19 | done = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
20 | ob = self._get_obs()
21 | return ob, reward, done, {}
22 |
23 | def _get_obs(self):
24 | qpos = self.sim.data.qpos
25 | qvel = self.sim.data.qvel
26 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
27 |
28 | def reset_model(self):
29 | self.set_state(
30 | self.init_qpos
31 | + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq),
32 | self.init_qvel
33 | + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv),
34 | )
35 | return self._get_obs()
36 |
37 | def viewer_setup(self):
38 | self.viewer.cam.trackbodyid = 2
39 | self.viewer.cam.distance = self.model.stat.extent * 0.5
40 | self.viewer.cam.lookat[2] = 1.15
41 | self.viewer.cam.elevation = -20
42 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/README.md:
--------------------------------------------------------------------------------
1 | # Robotics environments
2 |
3 | Details and documentation on these robotics environments are available in our [blog post](https://blog.openai.com/ingredients-for-robotics-research/), the accompanying [technical report](https://arxiv.org/abs/1802.09464), and the [Gym website](https://gym.openai.com/envs/#robotics).
4 |
5 | If you use these environments, please cite the following paper:
6 |
7 | ```
8 | @misc{1802.09464,
9 | Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
10 | Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
11 | Year = {2018},
12 | Eprint = {arXiv:1802.09464},
13 | }
14 | ```
15 |
16 | ## Fetch environments
17 |
18 |
19 | [FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position.
20 |
21 |
22 |
23 |
24 | [FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
25 |
26 |
27 |
28 |
29 | [FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position.
30 |
31 |
32 |
33 |
34 | [FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
35 |
36 | ## Shadow Dexterous Hand environments
37 |
38 |
39 | [HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
40 |
41 |
42 |
43 |
44 | [HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
45 |
46 |
47 |
48 |
49 | [HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
50 |
51 |
52 |
53 |
54 | [HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.
55 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.robotics.fetch_env import FetchEnv
2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv
3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv
4 | from gym.envs.robotics.fetch.push import FetchPushEnv
5 | from gym.envs.robotics.fetch.reach import FetchReachEnv
6 |
7 | from gym.envs.robotics.hand.reach import HandReachEnv
8 | from gym.envs.robotics.hand.manipulate import HandBlockEnv
9 | from gym.envs.robotics.hand.manipulate import HandEggEnv
10 | from gym.envs.robotics.hand.manipulate import HandPenEnv
11 |
12 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv
13 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv
14 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv
15 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/fetch/pick_and_place.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/fetch/push.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/fetch/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/fetch/slide.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/hand/manipulate_block.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/hand/manipulate_egg.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/hand/manipulate_egg_touch_sensors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/hand/manipulate_pen.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/hand/manipulate_pen_touch_sensors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/hand/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/hand/shared_asset.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/.get:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/.get
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/estop_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/estop_link.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/gripper_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/gripper_link.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/laser_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/laser_link.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/F1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/F1.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/F2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/F2.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/F3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/F3.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/TH1_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/TH1_z.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/TH2_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/TH2_z.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/TH3_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/TH3_z.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/forearm_electric.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/forearm_electric.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/knuckle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/knuckle.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/palm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/palm.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/stls/hand/wrist.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/stls/hand/wrist.stl
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/textures/block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/textures/block.png
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/assets/textures/block_hidden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/assets/textures/block_hidden.png
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/fetch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/fetch/__init__.py
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/fetch/pick_and_place.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join("fetch", "pick_and_place.xml")
8 |
9 |
10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type="sparse"):
12 | initial_qpos = {
13 | "robot0:slide0": 0.405,
14 | "robot0:slide1": 0.48,
15 | "robot0:slide2": 0.0,
16 | "object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0],
17 | }
18 | fetch_env.FetchEnv.__init__(
19 | self,
20 | MODEL_XML_PATH,
21 | has_object=True,
22 | block_gripper=False,
23 | n_substeps=20,
24 | gripper_extra_height=0.2,
25 | target_in_the_air=True,
26 | target_offset=0.0,
27 | obj_range=0.15,
28 | target_range=0.15,
29 | distance_threshold=0.05,
30 | initial_qpos=initial_qpos,
31 | reward_type=reward_type,
32 | )
33 | utils.EzPickle.__init__(self, reward_type=reward_type)
34 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/fetch/push.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join("fetch", "push.xml")
8 |
9 |
10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type="sparse"):
12 | initial_qpos = {
13 | "robot0:slide0": 0.05,
14 | "robot0:slide1": 0.48,
15 | "robot0:slide2": 0.0,
16 | "object0:joint": [1.7, 1.1, 0.41, 1.0, 0.0, 0.0, 0.0],
17 | }
18 | fetch_env.FetchEnv.__init__(
19 | self,
20 | MODEL_XML_PATH,
21 | has_object=True,
22 | block_gripper=True,
23 | n_substeps=20,
24 | gripper_extra_height=0.0,
25 | target_in_the_air=False,
26 | target_offset=0.0,
27 | obj_range=0.15,
28 | target_range=0.15,
29 | distance_threshold=0.15,
30 | initial_qpos=initial_qpos,
31 | reward_type=reward_type,
32 | )
33 | utils.EzPickle.__init__(self, reward_type=reward_type)
34 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/fetch/reach.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join("fetch", "reach.xml")
8 |
9 |
10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type="sparse"):
12 | initial_qpos = {
13 | "robot0:slide0": 0.4049,
14 | "robot0:slide1": 0.48,
15 | "robot0:slide2": 0.0,
16 | }
17 | fetch_env.FetchEnv.__init__(
18 | self,
19 | MODEL_XML_PATH,
20 | has_object=False,
21 | block_gripper=True,
22 | n_substeps=20,
23 | gripper_extra_height=0.2,
24 | target_in_the_air=True,
25 | target_offset=0.0,
26 | obj_range=0.15,
27 | target_range=0.15,
28 | distance_threshold=0.05,
29 | initial_qpos=initial_qpos,
30 | reward_type=reward_type,
31 | )
32 | utils.EzPickle.__init__(self, reward_type=reward_type)
33 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/fetch/slide.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | from gym import utils
5 | from gym.envs.robotics import fetch_env
6 |
7 |
8 | # Ensure we get the path separator correct on windows
9 | MODEL_XML_PATH = os.path.join("fetch", "slide.xml")
10 |
11 |
12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle):
13 | def __init__(self, reward_type="sparse"):
14 | initial_qpos = {
15 | "robot0:slide0": 0.05,
16 | "robot0:slide1": 0.48,
17 | "robot0:slide2": 0.0,
18 | "object0:joint": [1.7, 1.1, 0.41, 1.0, 0.0, 0.0, 0.0],
19 | }
20 | fetch_env.FetchEnv.__init__(
21 | self,
22 | MODEL_XML_PATH,
23 | has_object=True,
24 | block_gripper=True,
25 | n_substeps=20,
26 | gripper_extra_height=-0.02,
27 | target_in_the_air=False,
28 | target_offset=np.array([0.4, 0.0, 0.0]),
29 | obj_range=0.1,
30 | target_range=0.3,
31 | distance_threshold=0.05,
32 | initial_qpos=initial_qpos,
33 | reward_type=reward_type,
34 | )
35 | utils.EzPickle.__init__(self, reward_type=reward_type)
36 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/hand/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/envs/robotics/hand/__init__.py
--------------------------------------------------------------------------------
/envs/gym/gym/envs/robotics/hand_env.py:
--------------------------------------------------------------------------------
1 | import os
2 | import copy
3 | import numpy as np
4 |
5 | import gym
6 | from gym import error, spaces
7 | from gym.utils import seeding
8 | from gym.envs.robotics import robot_env
9 |
10 |
11 | class HandEnv(robot_env.RobotEnv):
12 | def __init__(self, model_path, n_substeps, initial_qpos, relative_control):
13 | self.relative_control = relative_control
14 |
15 | super(HandEnv, self).__init__(
16 | model_path=model_path,
17 | n_substeps=n_substeps,
18 | n_actions=20,
19 | initial_qpos=initial_qpos,
20 | )
21 |
22 | # RobotEnv methods
23 | # ----------------------------
24 |
25 | def _set_action(self, action):
26 | assert action.shape == (20,)
27 |
28 | ctrlrange = self.sim.model.actuator_ctrlrange
29 | actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.0
30 | if self.relative_control:
31 | actuation_center = np.zeros_like(action)
32 | for i in range(self.sim.data.ctrl.shape[0]):
33 | actuation_center[i] = self.sim.data.get_joint_qpos(
34 | self.sim.model.actuator_names[i].replace(":A_", ":")
35 | )
36 | for joint_name in ["FF", "MF", "RF", "LF"]:
37 | act_idx = self.sim.model.actuator_name2id(
38 | "robot0:A_{}J1".format(joint_name)
39 | )
40 | actuation_center[act_idx] += self.sim.data.get_joint_qpos(
41 | "robot0:{}J0".format(joint_name)
42 | )
43 | else:
44 | actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.0
45 | self.sim.data.ctrl[:] = actuation_center + action * actuation_range
46 | self.sim.data.ctrl[:] = np.clip(
47 | self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1]
48 | )
49 |
50 | def _viewer_setup(self):
51 | body_id = self.sim.model.body_name2id("robot0:palm")
52 | lookat = self.sim.data.body_xpos[body_id]
53 | for idx, value in enumerate(lookat):
54 | self.viewer.cam.lookat[idx] = value
55 | self.viewer.cam.distance = 0.5
56 | self.viewer.cam.azimuth = 55.0
57 | self.viewer.cam.elevation = -25.0
58 |
59 | def render(self, mode="human", width=500, height=500):
60 | return super(HandEnv, self).render(mode, width, height)
61 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/toy_text/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.toy_text.blackjack import BlackjackEnv
2 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv
3 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv
4 | from gym.envs.toy_text.taxi import TaxiEnv
5 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/toy_text/discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym import Env, spaces
4 | from gym.utils import seeding
5 |
6 |
7 | def categorical_sample(prob_n, np_random):
8 | """
9 | Sample from categorical distribution
10 | Each row specifies class probabilities
11 | """
12 | prob_n = np.asarray(prob_n)
13 | csprob_n = np.cumsum(prob_n)
14 | return (csprob_n > np_random.rand()).argmax()
15 |
16 |
17 | class DiscreteEnv(Env):
18 |
19 | """
20 | Has the following members
21 | - nS: number of states
22 | - nA: number of actions
23 | - P: transitions (*)
24 | - isd: initial state distribution (**)
25 |
26 | (*) dictionary of lists, where
27 | P[s][a] == [(probability, nextstate, reward, done), ...]
28 | (**) list or array of length nS
29 |
30 |
31 | """
32 |
33 | def __init__(self, nS, nA, P, isd):
34 | self.P = P
35 | self.isd = isd
36 | self.lastaction = None # for rendering
37 | self.nS = nS
38 | self.nA = nA
39 |
40 | self.action_space = spaces.Discrete(self.nA)
41 | self.observation_space = spaces.Discrete(self.nS)
42 |
43 | self.seed()
44 | self.s = categorical_sample(self.isd, self.np_random)
45 |
46 | def seed(self, seed=None):
47 | self.np_random, seed = seeding.np_random(seed)
48 | return [seed]
49 |
50 | def reset(self):
51 | self.s = categorical_sample(self.isd, self.np_random)
52 | self.lastaction = None
53 | return int(self.s)
54 |
55 | def step(self, a):
56 | transitions = self.P[self.s][a]
57 | i = categorical_sample([t[0] for t in transitions], self.np_random)
58 | p, s, r, d = transitions[i]
59 | self.s = s
60 | self.lastaction = a
61 | return (int(s), r, d, {"prob": p})
62 |
--------------------------------------------------------------------------------
/envs/gym/gym/envs/unittest/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.unittest.cube_crash import CubeCrash
2 | from gym.envs.unittest.cube_crash import CubeCrashSparse
3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack
4 | from gym.envs.unittest.memorize_digits import MemorizeDigits
5 |
--------------------------------------------------------------------------------
/envs/gym/gym/logger.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from gym.utils import colorize
4 |
5 | DEBUG = 10
6 | INFO = 20
7 | WARN = 30
8 | ERROR = 40
9 | DISABLED = 50
10 |
11 | MIN_LEVEL = 30
12 |
13 |
14 | def set_level(level):
15 | """
16 | Set logging threshold on current logger.
17 | """
18 | global MIN_LEVEL
19 | MIN_LEVEL = level
20 |
21 |
22 | def debug(msg, *args):
23 | if MIN_LEVEL <= DEBUG:
24 | print("%s: %s" % ("DEBUG", msg % args))
25 |
26 |
27 | def info(msg, *args):
28 | if MIN_LEVEL <= INFO:
29 | print("%s: %s" % ("INFO", msg % args))
30 |
31 |
32 | def warn(msg, *args):
33 | if MIN_LEVEL <= WARN:
34 | warnings.warn(colorize("%s: %s" % ("WARN", msg % args), "yellow"))
35 |
36 |
37 | def error(msg, *args):
38 | if MIN_LEVEL <= ERROR:
39 | print(colorize("%s: %s" % ("ERROR", msg % args), "red"))
40 |
41 |
42 | # DEPRECATED:
43 | setLevel = set_level
44 |
--------------------------------------------------------------------------------
/envs/gym/gym/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.spaces.space import Space
2 | from gym.spaces.box import Box
3 | from gym.spaces.discrete import Discrete
4 | from gym.spaces.multi_discrete import MultiDiscrete
5 | from gym.spaces.multi_binary import MultiBinary
6 | from gym.spaces.tuple import Tuple
7 | from gym.spaces.dict import Dict
8 |
9 | from gym.spaces.utils import flatdim
10 | from gym.spaces.utils import flatten_space
11 | from gym.spaces.utils import flatten
12 | from gym.spaces.utils import unflatten
13 |
14 | __all__ = [
15 | "Space",
16 | "Box",
17 | "Discrete",
18 | "MultiDiscrete",
19 | "MultiBinary",
20 | "Tuple",
21 | "Dict",
22 | "flatdim",
23 | "flatten_space",
24 | "flatten",
25 | "unflatten",
26 | ]
27 |
--------------------------------------------------------------------------------
/envs/gym/gym/spaces/discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class Discrete(Space):
6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
7 |
8 | Example::
9 |
10 | >>> Discrete(2)
11 |
12 | """
13 |
14 | def __init__(self, n, seed=None):
15 | assert n >= 0
16 | self.n = n
17 | super(Discrete, self).__init__((), np.int64, seed)
18 |
19 | def sample(self):
20 | return self.np_random.randint(self.n)
21 |
22 | def contains(self, x):
23 | if isinstance(x, int):
24 | as_int = x
25 | elif isinstance(x, (np.generic, np.ndarray)) and (
26 | x.dtype.char in np.typecodes["AllInteger"] and x.shape == ()
27 | ):
28 | as_int = int(x)
29 | else:
30 | return False
31 | return as_int >= 0 and as_int < self.n
32 |
33 | def __repr__(self):
34 | return "Discrete(%d)" % self.n
35 |
36 | def __eq__(self, other):
37 | return isinstance(other, Discrete) and self.n == other.n
38 |
--------------------------------------------------------------------------------
/envs/gym/gym/spaces/multi_binary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class MultiBinary(Space):
6 | """
7 | An n-shape binary space.
8 |
9 | The argument to MultiBinary defines n, which could be a number or a `list` of numbers.
10 |
11 | Example Usage:
12 |
13 | >> self.observation_space = spaces.MultiBinary(5)
14 |
15 | >> self.observation_space.sample()
16 |
17 | array([0,1,0,1,0], dtype =int8)
18 |
19 | >> self.observation_space = spaces.MultiBinary([3,2])
20 |
21 | >> self.observation_space.sample()
22 |
23 | array([[0, 0],
24 | [0, 1],
25 | [1, 1]], dtype=int8)
26 |
27 | """
28 |
29 | def __init__(self, n, seed=None):
30 | self.n = n
31 | if type(n) in [tuple, list, np.ndarray]:
32 | input_n = n
33 | else:
34 | input_n = (n,)
35 | super(MultiBinary, self).__init__(input_n, np.int8, seed)
36 |
37 | def sample(self):
38 | return self.np_random.randint(low=0, high=2, size=self.n, dtype=self.dtype)
39 |
40 | def contains(self, x):
41 | if isinstance(x, list) or isinstance(x, tuple):
42 | x = np.array(x) # Promote list to array for contains check
43 | if self.shape != x.shape:
44 | return False
45 | return ((x == 0) | (x == 1)).all()
46 |
47 | def to_jsonable(self, sample_n):
48 | return np.array(sample_n).tolist()
49 |
50 | def from_jsonable(self, sample_n):
51 | return [np.asarray(sample) for sample in sample_n]
52 |
53 | def __repr__(self):
54 | return "MultiBinary({})".format(self.n)
55 |
56 | def __eq__(self, other):
57 | return isinstance(other, MultiBinary) and self.n == other.n
58 |
--------------------------------------------------------------------------------
/envs/gym/gym/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """A set of common utilities used within the environments. These are
2 | not intended as API functions, and will not remain stable over time.
3 | """
4 |
5 | # These submodules should not have any import-time dependencies.
6 | # We want this since we use `utils` during our import-time sanity checks
7 | # that verify that our dependencies are actually present.
8 | from .colorize import colorize
9 | from .ezpickle import EzPickle
10 |
--------------------------------------------------------------------------------
/envs/gym/gym/utils/atomic_write.py:
--------------------------------------------------------------------------------
1 | # Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
2 |
3 | import os
4 | from contextlib import contextmanager
5 |
6 | # We would ideally atomically replace any existing file with the new
7 | # version. However, on Windows there's no Python-only solution prior
8 | # to Python 3.3. (This library includes a C extension to do so:
9 | # https://pypi.python.org/pypi/pyosreplace/0.1.)
10 | #
11 | # Correspondingly, we make a best effort, but on Python < 3.3 use a
12 | # replace method which could result in the file temporarily
13 | # disappearing.
14 | import sys
15 |
16 | if sys.version_info >= (3, 3):
17 | # Python 3.3 and up have a native `replace` method
18 | from os import replace
19 | elif sys.platform.startswith("win"):
20 |
21 | def replace(src, dst):
22 | # TODO: on Windows, this will raise if the file is in use,
23 | # which is possible. We'll need to make this more robust over
24 | # time.
25 | try:
26 | os.remove(dst)
27 | except OSError:
28 | pass
29 | os.rename(src, dst)
30 |
31 |
32 | else:
33 | # POSIX rename() is always atomic
34 | from os import rename as replace
35 |
36 |
37 | @contextmanager
38 | def atomic_write(filepath, binary=False, fsync=False):
39 | """Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
40 |
41 | :param filepath: the file path to be opened
42 | :param binary: whether to open the file in a binary mode instead of textual
43 | :param fsync: whether to force write the file to disk
44 | """
45 |
46 | tmppath = filepath + "~"
47 | while os.path.isfile(tmppath):
48 | tmppath += "~"
49 | try:
50 | with open(tmppath, "wb" if binary else "w") as file:
51 | yield file
52 | if fsync:
53 | file.flush()
54 | os.fsync(file.fileno())
55 | replace(tmppath, filepath)
56 | finally:
57 | try:
58 | os.remove(tmppath)
59 | except (IOError, OSError):
60 | pass
61 |
--------------------------------------------------------------------------------
/envs/gym/gym/utils/closer.py:
--------------------------------------------------------------------------------
1 | import atexit
2 | import threading
3 | import weakref
4 |
5 |
6 | class Closer(object):
7 | """A registry that ensures your objects get closed, whether manually,
8 | upon garbage collection, or upon exit. To work properly, your
9 | objects need to cooperate and do something like the following:
10 |
11 | ```
12 | closer = Closer()
13 | class Example(object):
14 | def __init__(self):
15 | self._id = closer.register(self)
16 |
17 | def close(self):
18 | # Probably worth making idempotent too!
19 | ...
20 | closer.unregister(self._id)
21 |
22 | def __del__(self):
23 | self.close()
24 | ```
25 |
26 | That is, your objects should:
27 |
28 | - register() themselves and save the returned ID
29 | - unregister() themselves upon close()
30 | - include a __del__ method which close()'s the object
31 | """
32 |
33 | def __init__(self, atexit_register=True):
34 | self.lock = threading.Lock()
35 | self.next_id = -1
36 | self.closeables = weakref.WeakValueDictionary()
37 |
38 | if atexit_register:
39 | atexit.register(self.close)
40 |
41 | def generate_next_id(self):
42 | with self.lock:
43 | self.next_id += 1
44 | return self.next_id
45 |
46 | def register(self, closeable):
47 | """Registers an object with a 'close' method.
48 |
49 | Returns:
50 | int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired.
51 | """
52 | assert hasattr(closeable, "close"), "No close method for {}".format(closeable)
53 |
54 | next_id = self.generate_next_id()
55 | self.closeables[next_id] = closeable
56 | return next_id
57 |
58 | def unregister(self, id):
59 | assert id is not None
60 | if id in self.closeables:
61 | del self.closeables[id]
62 |
63 | def close(self):
64 | # Explicitly fetch all monitors first so that they can't disappear while
65 | # we iterate. cf. http://stackoverflow.com/a/12429620
66 | closeables = list(self.closeables.values())
67 | for closeable in closeables:
68 | closeable.close()
69 |
--------------------------------------------------------------------------------
/envs/gym/gym/utils/colorize.py:
--------------------------------------------------------------------------------
1 | """A set of common utilities used within the environments. These are
2 | not intended as API functions, and will not remain stable over time.
3 | """
4 |
5 | color2num = dict(
6 | gray=30,
7 | red=31,
8 | green=32,
9 | yellow=33,
10 | blue=34,
11 | magenta=35,
12 | cyan=36,
13 | white=37,
14 | crimson=38,
15 | )
16 |
17 |
18 | def colorize(string, color, bold=False, highlight=False):
19 | """Return string surrounded by appropriate terminal color codes to
20 | print colorized text. Valid colors: gray, red, green, yellow,
21 | blue, magenta, cyan, white, crimson
22 | """
23 |
24 | attr = []
25 | num = color2num[color]
26 | if highlight:
27 | num += 10
28 | attr.append(str(num))
29 | if bold:
30 | attr.append("1")
31 | attrs = ";".join(attr)
32 | return "\x1b[%sm%s\x1b[0m" % (attrs, string)
33 |
--------------------------------------------------------------------------------
/envs/gym/gym/utils/ezpickle.py:
--------------------------------------------------------------------------------
1 | class EzPickle(object):
2 | """Objects that are pickled and unpickled via their constructor
3 | arguments.
4 |
5 | Example usage:
6 |
7 | class Dog(Animal, EzPickle):
8 | def __init__(self, furcolor, tailkind="bushy"):
9 | Animal.__init__()
10 | EzPickle.__init__(furcolor, tailkind)
11 | ...
12 |
13 | When this object is unpickled, a new Dog will be constructed by passing the provided
14 | furcolor and tailkind into the constructor. However, philosophers are still not sure
15 | whether it is still the same dog.
16 |
17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo
18 | and Atari.
19 | """
20 |
21 | def __init__(self, *args, **kwargs):
22 | self._ezpickle_args = args
23 | self._ezpickle_kwargs = kwargs
24 |
25 | def __getstate__(self):
26 | return {
27 | "_ezpickle_args": self._ezpickle_args,
28 | "_ezpickle_kwargs": self._ezpickle_kwargs,
29 | }
30 |
31 | def __setstate__(self, d):
32 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"])
33 | self.__dict__.update(out.__dict__)
34 |
--------------------------------------------------------------------------------
/envs/gym/gym/utils/json_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def json_encode_np(obj):
5 | """
6 | JSON can't serialize numpy types, so convert to pure python
7 | """
8 | if isinstance(obj, np.ndarray):
9 | return list(obj)
10 | elif isinstance(obj, np.float32):
11 | return float(obj)
12 | elif isinstance(obj, np.float64):
13 | return float(obj)
14 | elif isinstance(obj, np.int8):
15 | return int(obj)
16 | elif isinstance(obj, np.int16):
17 | return int(obj)
18 | elif isinstance(obj, np.int32):
19 | return int(obj)
20 | elif isinstance(obj, np.int64):
21 | return int(obj)
22 | else:
23 | return obj
24 |
--------------------------------------------------------------------------------
/envs/gym/gym/vector/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | from collections.abc import Iterable
3 | except ImportError:
4 | Iterable = (tuple, list)
5 |
6 | from gym.vector.async_vector_env import AsyncVectorEnv
7 | from gym.vector.sync_vector_env import SyncVectorEnv
8 | from gym.vector.vector_env import VectorEnv, VectorEnvWrapper
9 |
10 | __all__ = ["AsyncVectorEnv", "SyncVectorEnv", "VectorEnv", "VectorEnvWrapper", "make"]
11 |
12 |
13 | def make(id, num_envs=1, asynchronous=True, wrappers=None, **kwargs):
14 | """Create a vectorized environment from multiple copies of an environment,
15 | from its id
16 |
17 | Parameters
18 | ----------
19 | id : str
20 | The environment ID. This must be a valid ID from the registry.
21 |
22 | num_envs : int
23 | Number of copies of the environment.
24 |
25 | asynchronous : bool (default: `True`)
26 | If `True`, wraps the environments in an `AsyncVectorEnv` (which uses
27 | `multiprocessing` to run the environments in parallel). If `False`,
28 | wraps the environments in a `SyncVectorEnv`.
29 |
30 | wrappers : Callable or Iterable of Callables (default: `None`)
31 | If not `None`, then apply the wrappers to each internal
32 | environment during creation.
33 |
34 | Returns
35 | -------
36 | env : `gym.vector.VectorEnv` instance
37 | The vectorized environment.
38 |
39 | Example
40 | -------
41 | >>> import gym
42 | >>> env = gym.vector.make('CartPole-v1', 3)
43 | >>> env.reset()
44 | array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827],
45 | [ 0.03073904, 0.00145001, -0.03088818, -0.03131252],
46 | [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]],
47 | dtype=float32)
48 | """
49 | from gym.envs import make as make_
50 |
51 | def _make_env():
52 | env = make_(id, **kwargs)
53 | if wrappers is not None:
54 | if callable(wrappers):
55 | env = wrappers(env)
56 | elif isinstance(wrappers, Iterable) and all(
57 | [callable(w) for w in wrappers]
58 | ):
59 | for wrapper in wrappers:
60 | env = wrapper(env)
61 | else:
62 | raise NotImplementedError
63 | return env
64 |
65 | env_fns = [_make_env for _ in range(num_envs)]
66 | return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns)
67 |
--------------------------------------------------------------------------------
/envs/gym/gym/vector/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.vector.utils.misc import CloudpickleWrapper, clear_mpi_env_vars
2 | from gym.vector.utils.numpy_utils import concatenate, create_empty_array
3 | from gym.vector.utils.shared_memory import (
4 | create_shared_memory,
5 | read_from_shared_memory,
6 | write_to_shared_memory,
7 | )
8 | from gym.vector.utils.spaces import _BaseGymSpaces, batch_space
9 |
10 | __all__ = [
11 | "CloudpickleWrapper",
12 | "clear_mpi_env_vars",
13 | "concatenate",
14 | "create_empty_array",
15 | "create_shared_memory",
16 | "read_from_shared_memory",
17 | "write_to_shared_memory",
18 | "_BaseGymSpaces",
19 | "batch_space",
20 | ]
21 |
--------------------------------------------------------------------------------
/envs/gym/gym/vector/utils/misc.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import os
3 |
4 | __all__ = ["CloudpickleWrapper", "clear_mpi_env_vars"]
5 |
6 |
7 | class CloudpickleWrapper(object):
8 | def __init__(self, fn):
9 | self.fn = fn
10 |
11 | def __getstate__(self):
12 | import cloudpickle
13 |
14 | return cloudpickle.dumps(self.fn)
15 |
16 | def __setstate__(self, ob):
17 | import pickle
18 |
19 | self.fn = pickle.loads(ob)
20 |
21 | def __call__(self):
22 | return self.fn()
23 |
24 |
25 | @contextlib.contextmanager
26 | def clear_mpi_env_vars():
27 | """
28 | `from mpi4py import MPI` will call `MPI_Init` by default. If the child
29 | process has MPI environment variables, MPI will think that the child process
30 | is an MPI process just like the parent and do bad things such as hang.
31 |
32 | This context manager is a hacky way to clear those environment variables
33 | temporarily such as when we are starting multiprocessing Processes.
34 | """
35 | removed_environment = {}
36 | for k, v in list(os.environ.items()):
37 | for prefix in ["OMPI_", "PMI_"]:
38 | if k.startswith(prefix):
39 | removed_environment[k] = v
40 | del os.environ[k]
41 | try:
42 | yield
43 | finally:
44 | os.environ.update(removed_environment)
45 |
--------------------------------------------------------------------------------
/envs/gym/gym/version.py:
--------------------------------------------------------------------------------
1 | VERSION = "0.21.0"
2 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/README.md:
--------------------------------------------------------------------------------
1 | # Wrappers
2 |
3 | Wrappers are used to transform an environment in a modular way:
4 |
5 | ```python
6 | env = gym.make('Pong-v0')
7 | env = MyWrapper(env)
8 | ```
9 |
10 | Note that we may later restructure any of the files in this directory,
11 | but will keep the wrappers available at the wrappers' top-level
12 | folder. So for example, you should access `MyWrapper` as follows:
13 |
14 | ```python
15 | from gym.wrappers import MyWrapper
16 | ```
17 |
18 | ## Quick tips for writing your own wrapper
19 |
20 | - Don't forget to call `super(class_name, self).__init__(env)` if you override the wrapper's `__init__` function
21 | - You can access the inner environment with `self.unwrapped`
22 | - You can access the previous layer using `self.env`
23 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer
24 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `step`, `reset`, `render`, `close`, or `seed`
25 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`)
26 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from gym import error
2 | from gym.wrappers.monitor import Monitor
3 | from gym.wrappers.time_limit import TimeLimit
4 | from gym.wrappers.filter_observation import FilterObservation
5 | from gym.wrappers.atari_preprocessing import AtariPreprocessing
6 | from gym.wrappers.time_aware_observation import TimeAwareObservation
7 | from gym.wrappers.rescale_action import RescaleAction
8 | from gym.wrappers.flatten_observation import FlattenObservation
9 | from gym.wrappers.gray_scale_observation import GrayScaleObservation
10 | from gym.wrappers.frame_stack import LazyFrames
11 | from gym.wrappers.frame_stack import FrameStack
12 | from gym.wrappers.transform_observation import TransformObservation
13 | from gym.wrappers.transform_reward import TransformReward
14 | from gym.wrappers.resize_observation import ResizeObservation
15 | from gym.wrappers.clip_action import ClipAction
16 | from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics
17 | from gym.wrappers.normalize import NormalizeObservation, NormalizeReward
18 | from gym.wrappers.record_video import RecordVideo, capped_cubic_video_schedule
19 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/clip_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import ActionWrapper
3 | from gym.spaces import Box
4 |
5 |
6 | class ClipAction(ActionWrapper):
7 | r"""Clip the continuous action within the valid bound."""
8 |
9 | def __init__(self, env):
10 | assert isinstance(env.action_space, Box)
11 | super(ClipAction, self).__init__(env)
12 |
13 | def action(self, action):
14 | return np.clip(action, self.action_space.low, self.action_space.high)
15 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/filter_observation.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from gym import spaces
3 | from gym import ObservationWrapper
4 |
5 |
6 | class FilterObservation(ObservationWrapper):
7 | """Filter dictionary observations by their keys.
8 |
9 | Args:
10 | env: The environment to wrap.
11 | filter_keys: List of keys to be included in the observations.
12 |
13 | Raises:
14 | ValueError: If observation keys in not instance of None or
15 | iterable.
16 | ValueError: If any of the `filter_keys` are not included in
17 | the original `env`'s observation space
18 |
19 | """
20 |
21 | def __init__(self, env, filter_keys=None):
22 | super(FilterObservation, self).__init__(env)
23 |
24 | wrapped_observation_space = env.observation_space
25 | assert isinstance(
26 | wrapped_observation_space, spaces.Dict
27 | ), "FilterObservationWrapper is only usable with dict observations."
28 |
29 | observation_keys = wrapped_observation_space.spaces.keys()
30 |
31 | if filter_keys is None:
32 | filter_keys = tuple(observation_keys)
33 |
34 | missing_keys = set(key for key in filter_keys if key not in observation_keys)
35 |
36 | if missing_keys:
37 | raise ValueError(
38 | "All the filter_keys must be included in the "
39 | "original obsrevation space.\n"
40 | "Filter keys: {filter_keys}\n"
41 | "Observation keys: {observation_keys}\n"
42 | "Missing keys: {missing_keys}".format(
43 | filter_keys=filter_keys,
44 | observation_keys=observation_keys,
45 | missing_keys=missing_keys,
46 | )
47 | )
48 |
49 | self.observation_space = type(wrapped_observation_space)(
50 | [
51 | (name, copy.deepcopy(space))
52 | for name, space in wrapped_observation_space.spaces.items()
53 | if name in filter_keys
54 | ]
55 | )
56 |
57 | self._env = env
58 | self._filter_keys = tuple(filter_keys)
59 |
60 | def observation(self, observation):
61 | filter_observation = self._filter_observation(observation)
62 | return filter_observation
63 |
64 | def _filter_observation(self, observation):
65 | observation = type(observation)(
66 | [
67 | (name, value)
68 | for name, value in observation.items()
69 | if name in self._filter_keys
70 | ]
71 | )
72 | return observation
73 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/flatten_observation.py:
--------------------------------------------------------------------------------
1 | import gym.spaces as spaces
2 | from gym import ObservationWrapper
3 |
4 |
5 | class FlattenObservation(ObservationWrapper):
6 | r"""Observation wrapper that flattens the observation."""
7 |
8 | def __init__(self, env):
9 | super(FlattenObservation, self).__init__(env)
10 | self.observation_space = spaces.flatten_space(env.observation_space)
11 |
12 | def observation(self, observation):
13 | return spaces.flatten(self.env.observation_space, observation)
14 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/gray_scale_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym.spaces import Box
3 | from gym import ObservationWrapper
4 |
5 |
6 | class GrayScaleObservation(ObservationWrapper):
7 | r"""Convert the image observation from RGB to gray scale."""
8 |
9 | def __init__(self, env, keep_dim=False):
10 | super(GrayScaleObservation, self).__init__(env)
11 | self.keep_dim = keep_dim
12 |
13 | assert (
14 | len(env.observation_space.shape) == 3
15 | and env.observation_space.shape[-1] == 3
16 | )
17 |
18 | obs_shape = self.observation_space.shape[:2]
19 | if self.keep_dim:
20 | self.observation_space = Box(
21 | low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8
22 | )
23 | else:
24 | self.observation_space = Box(
25 | low=0, high=255, shape=obs_shape, dtype=np.uint8
26 | )
27 |
28 | def observation(self, observation):
29 | import cv2
30 |
31 | observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
32 | if self.keep_dim:
33 | observation = np.expand_dims(observation, -1)
34 | return observation
35 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/monitoring/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/gym/wrappers/monitoring/__init__.py
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/order_enforcing.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 |
4 | class OrderEnforcing(gym.Wrapper):
5 | def __init__(self, env):
6 | super(OrderEnforcing, self).__init__(env)
7 | self._has_reset = False
8 |
9 | def step(self, action):
10 | assert self._has_reset, "Cannot call env.step() before calling reset()"
11 | observation, reward, done, info = self.env.step(action)
12 | return observation, reward, done, info
13 |
14 | def reset(self, **kwargs):
15 | self._has_reset = True
16 | return self.env.reset(**kwargs)
17 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/record_episode_statistics.py:
--------------------------------------------------------------------------------
1 | import time
2 | from collections import deque
3 | import numpy as np
4 | import gym
5 |
6 |
7 | class RecordEpisodeStatistics(gym.Wrapper):
8 | def __init__(self, env, deque_size=100):
9 | super(RecordEpisodeStatistics, self).__init__(env)
10 | self.num_envs = getattr(env, "num_envs", 1)
11 | self.t0 = time.perf_counter()
12 | self.episode_count = 0
13 | self.episode_returns = None
14 | self.episode_lengths = None
15 | self.return_queue = deque(maxlen=deque_size)
16 | self.length_queue = deque(maxlen=deque_size)
17 | self.is_vector_env = getattr(env, "is_vector_env", False)
18 |
19 | def reset(self, **kwargs):
20 | observations = super(RecordEpisodeStatistics, self).reset(**kwargs)
21 | self.episode_returns = np.zeros(self.num_envs, dtype=np.float32)
22 | self.episode_lengths = np.zeros(self.num_envs, dtype=np.int32)
23 | return observations
24 |
25 | def step(self, action):
26 | observations, rewards, dones, infos = super(RecordEpisodeStatistics, self).step(
27 | action
28 | )
29 | self.episode_returns += rewards
30 | self.episode_lengths += 1
31 | if not self.is_vector_env:
32 | infos = [infos]
33 | dones = [dones]
34 | for i in range(len(dones)):
35 | if dones[i]:
36 | infos[i] = infos[i].copy()
37 | episode_return = self.episode_returns[i]
38 | episode_length = self.episode_lengths[i]
39 | episode_info = {
40 | "r": episode_return,
41 | "l": episode_length,
42 | "t": round(time.perf_counter() - self.t0, 6),
43 | }
44 | infos[i]["episode"] = episode_info
45 | self.return_queue.append(episode_return)
46 | self.length_queue.append(episode_length)
47 | self.episode_count += 1
48 | self.episode_returns[i] = 0
49 | self.episode_lengths[i] = 0
50 | return (
51 | observations,
52 | rewards,
53 | dones if self.is_vector_env else dones[0],
54 | infos if self.is_vector_env else infos[0],
55 | )
56 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/rescale_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gym
3 | from gym import spaces
4 |
5 |
6 | class RescaleAction(gym.ActionWrapper):
7 | r"""Rescales the continuous action space of the environment to a range [min_action, max_action].
8 |
9 | Example::
10 |
11 | >>> RescaleAction(env, min_action, max_action).action_space == Box(min_action, max_action)
12 | True
13 |
14 | """
15 |
16 | def __init__(self, env, min_action, max_action):
17 | assert isinstance(
18 | env.action_space, spaces.Box
19 | ), "expected Box action space, got {}".format(type(env.action_space))
20 | assert np.less_equal(min_action, max_action).all(), (min_action, max_action)
21 |
22 | super(RescaleAction, self).__init__(env)
23 | self.min_action = (
24 | np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + min_action
25 | )
26 | self.max_action = (
27 | np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + max_action
28 | )
29 | self.action_space = spaces.Box(
30 | low=min_action,
31 | high=max_action,
32 | shape=env.action_space.shape,
33 | dtype=env.action_space.dtype,
34 | )
35 |
36 | def action(self, action):
37 | assert np.all(np.greater_equal(action, self.min_action)), (
38 | action,
39 | self.min_action,
40 | )
41 | assert np.all(np.less_equal(action, self.max_action)), (action, self.max_action)
42 | low = self.env.action_space.low
43 | high = self.env.action_space.high
44 | action = low + (high - low) * (
45 | (action - self.min_action) / (self.max_action - self.min_action)
46 | )
47 | action = np.clip(action, low, high)
48 | return action
49 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/resize_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym.spaces import Box
3 | from gym import ObservationWrapper
4 |
5 |
6 | class ResizeObservation(ObservationWrapper):
7 | r"""Downsample the image observation to a square image."""
8 |
9 | def __init__(self, env, shape):
10 | super(ResizeObservation, self).__init__(env)
11 | if isinstance(shape, int):
12 | shape = (shape, shape)
13 | assert all(x > 0 for x in shape), shape
14 |
15 | self.shape = tuple(shape)
16 |
17 | obs_shape = self.shape + self.observation_space.shape[2:]
18 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)
19 |
20 | def observation(self, observation):
21 | import cv2
22 |
23 | observation = cv2.resize(
24 | observation, self.shape[::-1], interpolation=cv2.INTER_AREA
25 | )
26 | if observation.ndim == 2:
27 | observation = np.expand_dims(observation, -1)
28 | return observation
29 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/time_aware_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym.spaces import Box
3 | from gym import ObservationWrapper
4 |
5 |
6 | class TimeAwareObservation(ObservationWrapper):
7 | r"""Augment the observation with current time step in the trajectory.
8 |
9 | .. note::
10 | Currently it only works with one-dimensional observation space. It doesn't
11 | support pixel observation space yet.
12 |
13 | """
14 |
15 | def __init__(self, env):
16 | super(TimeAwareObservation, self).__init__(env)
17 | assert isinstance(env.observation_space, Box)
18 | assert env.observation_space.dtype == np.float32
19 | low = np.append(self.observation_space.low, 0.0)
20 | high = np.append(self.observation_space.high, np.inf)
21 | self.observation_space = Box(low, high, dtype=np.float32)
22 |
23 | def observation(self, observation):
24 | return np.append(observation, self.t)
25 |
26 | def step(self, action):
27 | self.t += 1
28 | return super(TimeAwareObservation, self).step(action)
29 |
30 | def reset(self, **kwargs):
31 | self.t = 0
32 | return super(TimeAwareObservation, self).reset(**kwargs)
33 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/time_limit.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 |
4 | class TimeLimit(gym.Wrapper):
5 | def __init__(self, env, max_episode_steps=None):
6 | super(TimeLimit, self).__init__(env)
7 | if max_episode_steps is None and self.env.spec is not None:
8 | max_episode_steps = env.spec.max_episode_steps
9 | if self.env.spec is not None:
10 | self.env.spec.max_episode_steps = max_episode_steps
11 | self._max_episode_steps = max_episode_steps
12 | self._elapsed_steps = None
13 |
14 | def step(self, action):
15 | assert (
16 | self._elapsed_steps is not None
17 | ), "Cannot call env.step() before calling reset()"
18 | observation, reward, done, info = self.env.step(action)
19 | self._elapsed_steps += 1
20 | if self._elapsed_steps >= self._max_episode_steps:
21 | info["TimeLimit.truncated"] = not done
22 | done = True
23 | return observation, reward, done, info
24 |
25 | def reset(self, **kwargs):
26 | self._elapsed_steps = 0
27 | return self.env.reset(**kwargs)
28 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/transform_observation.py:
--------------------------------------------------------------------------------
1 | from gym import ObservationWrapper
2 |
3 |
4 | class TransformObservation(ObservationWrapper):
5 | r"""Transform the observation via an arbitrary function.
6 |
7 | Example::
8 |
9 | >>> import gym
10 | >>> env = gym.make('CartPole-v1')
11 | >>> env = TransformObservation(env, lambda obs: obs + 0.1*np.random.randn(*obs.shape))
12 | >>> env.reset()
13 | array([-0.08319338, 0.04635121, -0.07394746, 0.20877492])
14 |
15 | Args:
16 | env (Env): environment
17 | f (callable): a function that transforms the observation
18 |
19 | """
20 |
21 | def __init__(self, env, f):
22 | super(TransformObservation, self).__init__(env)
23 | assert callable(f)
24 | self.f = f
25 |
26 | def observation(self, observation):
27 | return self.f(observation)
28 |
--------------------------------------------------------------------------------
/envs/gym/gym/wrappers/transform_reward.py:
--------------------------------------------------------------------------------
1 | from gym import RewardWrapper
2 |
3 |
4 | class TransformReward(RewardWrapper):
5 | r"""Transform the reward via an arbitrary function.
6 |
7 | Example::
8 |
9 | >>> import gym
10 | >>> env = gym.make('CartPole-v1')
11 | >>> env = TransformReward(env, lambda r: 0.01*r)
12 | >>> env.reset()
13 | >>> observation, reward, done, info = env.step(env.action_space.sample())
14 | >>> reward
15 | 0.01
16 |
17 | Args:
18 | env (Env): environment
19 | f (callable): a function that transforms the reward
20 |
21 | """
22 |
23 | def __init__(self, env, f):
24 | super(TransformReward, self).__init__(env)
25 | assert callable(f)
26 | self.f = f
27 |
28 | def reward(self, reward):
29 | return self.f(reward)
30 |
--------------------------------------------------------------------------------
/envs/gym/py.Dockerfile:
--------------------------------------------------------------------------------
1 | # A Dockerfile that sets up a full Gym install with test dependencies
2 | ARG PYTHON_VERSION
3 | FROM python:$PYTHON_VERSION
4 | RUN apt-get -y update && apt-get install -y unzip libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig
5 |
6 | # Download mujoco
7 | RUN mkdir /root/.mujoco && \
8 | cd /root/.mujoco && \
9 | curl -O https://www.roboti.us/download/mjpro150_linux.zip && \
10 | unzip mjpro150_linux.zip && \
11 | echo DUMMY_KEY > /root/.mujoco/mjkey.txt
12 |
13 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin
14 |
15 | COPY . /usr/local/gym/
16 | WORKDIR /usr/local/gym/
17 |
18 | RUN pip install .[nomujoco,accept-rom-license] && pip install -r test_requirements.txt
19 |
20 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"]
21 |
--------------------------------------------------------------------------------
/envs/gym/requirements.txt:
--------------------------------------------------------------------------------
1 | ale-py~=0.7
2 | opencv-python>=3.
3 | box2d-py==2.3.5
4 | mujoco_py>=1.50, <2.0
5 | scipy>=1.4.1
6 | numpy>=1.18.0
7 | pyglet>=1.4.0
8 | cloudpickle>=1.2.0
9 | lz4>=3.1.0
10 |
--------------------------------------------------------------------------------
/envs/gym/setup.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import sys
3 | import itertools
4 |
5 | from setuptools import find_packages, setup
6 |
7 | # Don't import gym module here, since deps may not be installed
8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "gym"))
9 | from version import VERSION
10 |
11 | # Environment-specific dependencies.
12 | extras = {
13 | "atari": ["ale-py~=0.7.1"],
14 | "accept-rom-license": ["autorom[accept-rom-license]~=0.4.2"],
15 | "box2d": ["box2d-py==2.3.5", "pyglet>=1.4.0"],
16 | "classic_control": ["pyglet>=1.4.0"],
17 | "mujoco": ["mujoco_py>=1.50, <2.0"],
18 | "robotics": ["mujoco_py>=1.50, <2.0"],
19 | "toy_text": ["scipy>=1.4.1"],
20 | "other": ["lz4>=3.1.0", "opencv-python>=3."],
21 | }
22 |
23 | # Meta dependency groups.
24 | nomujoco_blacklist = set(["mujoco", "robotics", "accept-rom-license"])
25 | nomujoco_groups = set(extras.keys()) - nomujoco_blacklist
26 |
27 | extras["nomujoco"] = list(
28 | itertools.chain.from_iterable(map(lambda group: extras[group], nomujoco_groups))
29 | )
30 |
31 |
32 | all_blacklist = set(["accept-rom-license"])
33 | all_groups = set(extras.keys()) - all_blacklist
34 |
35 | extras["all"] = list(
36 | itertools.chain.from_iterable(map(lambda group: extras[group], all_groups))
37 | )
38 |
39 | setup(
40 | name="gym",
41 | version=VERSION,
42 | description="Gym: A universal API for reinforcement learning environments.",
43 | url="https://github.com/openai/gym",
44 | author="OpenAI",
45 | author_email="jkterry@umd.edu",
46 | license="",
47 | packages=[package for package in find_packages() if package.startswith("gym")],
48 | zip_safe=False,
49 | install_requires=[
50 | "numpy>=1.18.0",
51 | "cloudpickle>=1.2.0",
52 | "importlib_metadata>=4.8.1; python_version < '3.8'",
53 | ],
54 | extras_require=extras,
55 | package_data={
56 | "gym": [
57 | "envs/mujoco/assets/*.xml",
58 | "envs/classic_control/assets/*.png",
59 | "envs/robotics/assets/LICENSE.md",
60 | "envs/robotics/assets/fetch/*.xml",
61 | "envs/robotics/assets/hand/*.xml",
62 | "envs/robotics/assets/stls/fetch/*.stl",
63 | "envs/robotics/assets/stls/hand/*.stl",
64 | "envs/robotics/assets/textures/*.png",
65 | ]
66 | },
67 | tests_require=["pytest", "mock"],
68 | python_requires=">=3.6",
69 | classifiers=[
70 | "Programming Language :: Python :: 3",
71 | "Programming Language :: Python :: 3.6",
72 | "Programming Language :: Python :: 3.7",
73 | "Programming Language :: Python :: 3.8",
74 | "Programming Language :: Python :: 3.9",
75 | ],
76 | )
77 |
--------------------------------------------------------------------------------
/envs/gym/test_requirements.txt:
--------------------------------------------------------------------------------
1 | lz4~=3.1
2 | pytest~=6.2
3 | pytest-forked~=1.3
4 |
--------------------------------------------------------------------------------
/envs/gym/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/envs/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/envs/robotics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/envs/robotics/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/envs/robotics/hand/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/envs/robotics/hand/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/envs/robotics/hand/test_manipulate.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | import pytest
4 |
5 | from gym import envs
6 | from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
7 |
8 |
9 | ENVIRONMENT_IDS = (
10 | "HandManipulateEgg-v0",
11 | "HandManipulatePen-v0",
12 | "HandManipulateBlock-v0",
13 | )
14 |
15 |
16 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE)
17 | @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS)
18 | def test_serialize_deserialize(environment_id):
19 | env1 = envs.make(environment_id, target_position="fixed")
20 | env1.reset()
21 | env2 = pickle.loads(pickle.dumps(env1))
22 |
23 | assert env1.target_position == env2.target_position, (
24 | env1.target_position,
25 | env2.target_position,
26 | )
27 |
--------------------------------------------------------------------------------
/envs/gym/tests/envs/robotics/hand/test_manipulate_touch_sensors.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | import pytest
4 |
5 | from gym import envs
6 | from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
7 |
8 |
9 | ENVIRONMENT_IDS = (
10 | "HandManipulateEggTouchSensors-v1",
11 | "HandManipulatePenTouchSensors-v0",
12 | "HandManipulateBlockTouchSensors-v0",
13 | )
14 |
15 |
16 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE)
17 | @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS)
18 | def test_serialize_deserialize(environment_id):
19 | env1 = envs.make(environment_id, target_position="fixed")
20 | env1.reset()
21 | env2 = pickle.loads(pickle.dumps(env1))
22 |
23 | assert env1.target_position == env2.target_position, (
24 | env1.target_position,
25 | env2.target_position,
26 | )
27 |
--------------------------------------------------------------------------------
/envs/gym/tests/envs/robotics/hand/test_reach.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | import pytest
4 |
5 | from gym import envs
6 | from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
7 |
8 |
9 | @pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE)
10 | def test_serialize_deserialize():
11 | env1 = envs.make("HandReach-v0", distance_threshold=1e-6)
12 | env1.reset()
13 | env2 = pickle.loads(pickle.dumps(env1))
14 |
15 | assert env1.distance_threshold == env2.distance_threshold, (
16 | env1.distance_threshold,
17 | env2.distance_threshold,
18 | )
19 |
--------------------------------------------------------------------------------
/envs/gym/tests/envs/rollout.json:
--------------------------------------------------------------------------------
1 | {}
--------------------------------------------------------------------------------
/envs/gym/tests/envs/spec_list.py:
--------------------------------------------------------------------------------
1 | from gym import envs, logger
2 | import os
3 |
4 |
5 | SKIP_MUJOCO_WARNING_MESSAGE = (
6 | "Cannot run mujoco test (either license key not found or mujoco not"
7 | "installed properly)."
8 | )
9 |
10 |
11 | skip_mujoco = not (os.environ.get("MUJOCO_KEY"))
12 | if not skip_mujoco:
13 | try:
14 | import mujoco_py
15 | except ImportError:
16 | skip_mujoco = True
17 |
18 |
19 | def should_skip_env_spec_for_tests(spec):
20 | # We skip tests for envs that require dependencies or are otherwise
21 | # troublesome to run frequently
22 | ep = spec.entry_point
23 | # Skip mujoco tests for pull request CI
24 | if skip_mujoco and (
25 | ep.startswith("gym.envs.mujoco") or ep.startswith("gym.envs.robotics:")
26 | ):
27 | return True
28 | try:
29 | import gym.envs.atari
30 | except ImportError:
31 | if ep.startswith("gym.envs.atari"):
32 | return True
33 | try:
34 | import Box2D
35 | except ImportError:
36 | if ep.startswith("gym.envs.box2d"):
37 | return True
38 |
39 | if (
40 | "GoEnv" in ep
41 | or "HexEnv" in ep
42 | or (
43 | ep.startswith("gym.envs.atari")
44 | and not spec.id.startswith("Pong")
45 | and not spec.id.startswith("Seaquest")
46 | )
47 | ):
48 | logger.warn("Skipping tests for env {}".format(ep))
49 | return True
50 | return False
51 |
52 |
53 | spec_list = [
54 | spec
55 | for spec in sorted(envs.registry.all(), key=lambda x: x.id)
56 | if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)
57 | ]
58 |
--------------------------------------------------------------------------------
/envs/gym/tests/envs/test_frozenlake_dfs.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym.envs.toy_text.frozen_lake import generate_random_map
5 |
6 |
7 | # Test that FrozenLake map generation creates valid maps of various sizes.
8 | def test_frozenlake_dfs_map_generation():
9 | def frozenlake_dfs_path_exists(res):
10 | frontier, discovered = [], set()
11 | frontier.append((0, 0))
12 | while frontier:
13 | r, c = frontier.pop()
14 | if not (r, c) in discovered:
15 | discovered.add((r, c))
16 | directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
17 | for x, y in directions:
18 | r_new = r + x
19 | c_new = c + y
20 | if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
21 | continue
22 | if res[r_new][c_new] == "G":
23 | return True
24 | if res[r_new][c_new] not in "#H":
25 | frontier.append((r_new, c_new))
26 | return False
27 |
28 | map_sizes = [5, 10, 200]
29 | for size in map_sizes:
30 | new_frozenlake = generate_random_map(size)
31 | assert len(new_frozenlake) == size
32 | assert len(new_frozenlake[0]) == size
33 | assert frozenlake_dfs_path_exists(new_frozenlake)
34 |
--------------------------------------------------------------------------------
/envs/gym/tests/envs/test_lunar_lander.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | try:
4 | import Box2D
5 | from gym.envs.box2d.lunar_lander import (
6 | LunarLander,
7 | LunarLanderContinuous,
8 | demo_heuristic_lander,
9 | )
10 | except ImportError:
11 | Box2D = None
12 |
13 |
14 | @pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
15 | def test_lunar_lander():
16 | _test_lander(LunarLander(), seed=0)
17 |
18 |
19 | @pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
20 | def test_lunar_lander_continuous():
21 | _test_lander(LunarLanderContinuous(), seed=0)
22 |
23 |
24 | @pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
25 | def _test_lander(env, seed=None, render=False):
26 | total_reward = demo_heuristic_lander(env, seed=seed, render=render)
27 | assert total_reward > 100
28 |
--------------------------------------------------------------------------------
/envs/gym/tests/envs/test_mujoco_v2_to_v3_conversion.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import numpy as np
3 | from gym import envs
4 | from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
5 |
6 |
7 | def verify_environments_match(
8 | old_environment_id, new_environment_id, seed=1, num_actions=1000
9 | ):
10 | old_environment = envs.make(old_environment_id)
11 | new_environment = envs.make(new_environment_id)
12 |
13 | old_environment.seed(seed)
14 | new_environment.seed(seed)
15 |
16 | old_reset_observation = old_environment.reset()
17 | new_reset_observation = new_environment.reset()
18 |
19 | np.testing.assert_allclose(old_reset_observation, new_reset_observation)
20 |
21 | for i in range(num_actions):
22 | action = old_environment.action_space.sample()
23 | old_observation, old_reward, old_done, old_info = old_environment.step(action)
24 | new_observation, new_reward, new_done, new_info = new_environment.step(action)
25 |
26 | eps = 1e-6
27 | np.testing.assert_allclose(old_observation, new_observation, atol=eps)
28 | np.testing.assert_allclose(old_reward, new_reward, atol=eps)
29 | np.testing.assert_allclose(old_done, new_done, atol=eps)
30 |
31 | for key in old_info:
32 | np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)
33 |
34 |
35 | @unittest.skipIf(skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE)
36 | class Mujocov2Tov3ConversionTest(unittest.TestCase):
37 | def test_environments_match(self):
38 | test_cases = (
39 | {"old_id": "Swimmer-v2", "new_id": "Swimmer-v3"},
40 | {"old_id": "Hopper-v2", "new_id": "Hopper-v3"},
41 | {"old_id": "Walker2d-v2", "new_id": "Walker2d-v3"},
42 | {"old_id": "HalfCheetah-v2", "new_id": "HalfCheetah-v3"},
43 | {"old_id": "Ant-v2", "new_id": "Ant-v3"},
44 | {"old_id": "Humanoid-v2", "new_id": "Humanoid-v3"},
45 | )
46 |
47 | for test_case in test_cases:
48 | verify_environments_match(test_case["old_id"], test_case["new_id"])
49 |
50 | # Raises KeyError because the new envs have extra info
51 | with self.assertRaises(KeyError):
52 | verify_environments_match("Swimmer-v3", "Swimmer-v2")
53 |
54 | # Raises KeyError because the new envs have extra info
55 | with self.assertRaises(KeyError):
56 | verify_environments_match("Humanoid-v3", "Humanoid-v2")
57 |
58 | # Raises KeyError because the new envs have extra info
59 | with self.assertRaises(KeyError):
60 | verify_environments_match("Swimmer-v3", "Swimmer-v2")
61 |
62 |
63 | if __name__ == "__main__":
64 | unittest.main()
65 |
--------------------------------------------------------------------------------
/envs/gym/tests/envs/test_registration.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import gym
3 | from gym import error, envs
4 | from gym.envs import registration
5 | from gym.envs.classic_control import cartpole
6 |
7 |
8 | class ArgumentEnv(gym.Env):
9 | def __init__(self, arg1, arg2, arg3):
10 | self.arg1 = arg1
11 | self.arg2 = arg2
12 | self.arg3 = arg3
13 |
14 |
15 | gym.register(
16 | id="test.ArgumentEnv-v0",
17 | entry_point="tests.envs.test_registration:ArgumentEnv",
18 | kwargs={
19 | "arg1": "arg1",
20 | "arg2": "arg2",
21 | },
22 | )
23 |
24 |
25 | def test_make():
26 | env = envs.make("CartPole-v0")
27 | assert env.spec.id == "CartPole-v0"
28 | assert isinstance(env.unwrapped, cartpole.CartPoleEnv)
29 |
30 |
31 | def test_make_with_kwargs():
32 | env = envs.make("test.ArgumentEnv-v0", arg2="override_arg2", arg3="override_arg3")
33 | assert env.spec.id == "test.ArgumentEnv-v0"
34 | assert isinstance(env.unwrapped, ArgumentEnv)
35 | assert env.arg1 == "arg1"
36 | assert env.arg2 == "override_arg2"
37 | assert env.arg3 == "override_arg3"
38 |
39 |
40 | def test_make_deprecated():
41 | try:
42 | envs.make("Humanoid-v0")
43 | except error.Error:
44 | pass
45 | else:
46 | assert False
47 |
48 |
49 | def test_spec():
50 | spec = envs.spec("CartPole-v0")
51 | assert spec.id == "CartPole-v0"
52 |
53 |
54 | def test_spec_with_kwargs():
55 | map_name_value = "8x8"
56 | env = gym.make("FrozenLake-v1", map_name=map_name_value)
57 | assert env.spec._kwargs["map_name"] == map_name_value
58 |
59 |
60 | def test_missing_lookup():
61 | registry = registration.EnvRegistry()
62 | registry.register(id="Test-v0", entry_point=None)
63 | registry.register(id="Test-v15", entry_point=None)
64 | registry.register(id="Test-v9", entry_point=None)
65 | registry.register(id="Other-v100", entry_point=None)
66 | try:
67 | registry.spec("Test-v1") # must match an env name but not the version above
68 | except error.DeprecatedEnv:
69 | pass
70 | else:
71 | assert False
72 |
73 | try:
74 | registry.spec("Unknown-v1")
75 | except error.UnregisteredEnv:
76 | pass
77 | else:
78 | assert False
79 |
80 |
81 | def test_malformed_lookup():
82 | registry = registration.EnvRegistry()
83 | try:
84 | registry.spec(u"“Breakout-v0”")
85 | except error.Error as e:
86 | assert "malformed environment ID" in "{}".format(
87 | e
88 | ), "Unexpected message: {}".format(e)
89 | else:
90 | assert False
91 |
--------------------------------------------------------------------------------
/envs/gym/tests/spaces/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/spaces/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/utils/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/utils/test_atexit.py:
--------------------------------------------------------------------------------
1 | from gym.utils.closer import Closer
2 |
3 |
4 | class Closeable(object):
5 | close_called = False
6 |
7 | def close(self):
8 | self.close_called = True
9 |
10 |
11 | def test_register_unregister():
12 | registry = Closer(atexit_register=False)
13 | c1 = Closeable()
14 | c2 = Closeable()
15 |
16 | assert not c1.close_called
17 | assert not c2.close_called
18 | registry.register(c1)
19 | id2 = registry.register(c2)
20 |
21 | registry.unregister(id2)
22 | registry.close()
23 | assert c1.close_called
24 | assert not c2.close_called
25 |
--------------------------------------------------------------------------------
/envs/gym/tests/utils/test_env_checker.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 | import pytest
4 | from gym.spaces import Box, Dict, Discrete
5 |
6 | from gym.utils.env_checker import check_env
7 |
8 |
9 | class ActionDictTestEnv(gym.Env):
10 | action_space = Dict({"position": Discrete(1), "velocity": Discrete(1)})
11 | observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32)
12 |
13 | def step(self, action):
14 | observation = np.array([1.0, 1.5, 0.5])
15 | reward = 1
16 | done = True
17 | return observation, reward, done
18 |
19 | def reset(self):
20 | return np.array([1.0, 1.5, 0.5])
21 |
22 | def render(self, mode="human"):
23 | pass
24 |
25 |
26 | def test_check_env_dict_action():
27 | # Environment.step() only returns 3 values: obs, reward, done. Not info!
28 | test_env = ActionDictTestEnv()
29 |
30 | with pytest.raises(AssertionError) as errorinfo:
31 | check_env(env=test_env, warn=True)
32 | assert (
33 | str(errorinfo.value)
34 | == "The `step()` method must return four values: obs, reward, done, info"
35 | )
36 |
--------------------------------------------------------------------------------
/envs/gym/tests/utils/test_seeding.py:
--------------------------------------------------------------------------------
1 | from gym import error
2 | from gym.utils import seeding
3 |
4 |
5 | def test_invalid_seeds():
6 | for seed in [-1, "test"]:
7 | try:
8 | seeding.np_random(seed)
9 | except error.Error:
10 | pass
11 | else:
12 | assert False, "Invalid seed {} passed validation".format(seed)
13 |
14 |
15 | def test_valid_seeds():
16 | for seed in [0, 1]:
17 | random, seed1 = seeding.np_random(seed)
18 | assert seed == seed1
19 |
--------------------------------------------------------------------------------
/envs/gym/tests/vector/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/vector/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/vector/test_vector_env.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym.spaces import Tuple
5 | from tests.vector.utils import CustomSpace, make_env
6 |
7 | from gym.vector.async_vector_env import AsyncVectorEnv
8 | from gym.vector.sync_vector_env import SyncVectorEnv
9 | from gym.vector.vector_env import VectorEnv
10 |
11 |
12 | @pytest.mark.parametrize("shared_memory", [True, False])
13 | def test_vector_env_equal(shared_memory):
14 | env_fns = [make_env("CubeCrash-v0", i) for i in range(4)]
15 | num_steps = 100
16 | try:
17 | async_env = AsyncVectorEnv(env_fns, shared_memory=shared_memory)
18 | sync_env = SyncVectorEnv(env_fns)
19 |
20 | async_env.seed(0)
21 | sync_env.seed(0)
22 |
23 | assert async_env.num_envs == sync_env.num_envs
24 | assert async_env.observation_space == sync_env.observation_space
25 | assert async_env.single_observation_space == sync_env.single_observation_space
26 | assert async_env.action_space == sync_env.action_space
27 | assert async_env.single_action_space == sync_env.single_action_space
28 |
29 | async_observations = async_env.reset()
30 | sync_observations = sync_env.reset()
31 | assert np.all(async_observations == sync_observations)
32 |
33 | for _ in range(num_steps):
34 | actions = async_env.action_space.sample()
35 | assert actions in sync_env.action_space
36 |
37 | async_observations, async_rewards, async_dones, _ = async_env.step(actions)
38 | sync_observations, sync_rewards, sync_dones, _ = sync_env.step(actions)
39 |
40 | assert np.all(async_observations == sync_observations)
41 | assert np.all(async_rewards == sync_rewards)
42 | assert np.all(async_dones == sync_dones)
43 |
44 | finally:
45 | async_env.close()
46 | sync_env.close()
47 |
48 |
49 | def test_custom_space_vector_env():
50 | env = VectorEnv(4, CustomSpace(), CustomSpace())
51 |
52 | assert isinstance(env.single_observation_space, CustomSpace)
53 | assert isinstance(env.observation_space, Tuple)
54 |
55 | assert isinstance(env.single_action_space, CustomSpace)
56 | assert isinstance(env.action_space, Tuple)
57 |
--------------------------------------------------------------------------------
/envs/gym/tests/vector/test_vector_env_wrapper.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym.vector import make
3 | from gym.vector import VectorEnvWrapper
4 |
5 |
6 | class DummyWrapper(VectorEnvWrapper):
7 | def __init__(self, env):
8 | self.env = env
9 | self.counter = 0
10 |
11 | def reset_async(self):
12 | super().reset_async()
13 | self.counter += 1
14 |
15 |
16 | def test_vector_env_wrapper_inheritance():
17 | env = make("FrozenLake-v1", asynchronous=False)
18 | wrapped = DummyWrapper(env)
19 | wrapped.reset()
20 | assert wrapped.counter == 1
21 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/wrappers/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/monitoring/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/gym/tests/wrappers/monitoring/__init__.py
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/monitoring/helpers.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import shutil
3 | import tempfile
4 |
5 |
6 | @contextlib.contextmanager
7 | def tempdir():
8 | temp = tempfile.mkdtemp()
9 | yield temp
10 | shutil.rmtree(temp)
11 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/monitoring/test_video_recorder.py:
--------------------------------------------------------------------------------
1 | import gc
2 | import os
3 | import time
4 |
5 | import gym
6 | from gym.wrappers.monitoring.video_recorder import VideoRecorder
7 |
8 |
9 | class BrokenRecordableEnv(object):
10 | metadata = {"render.modes": [None, "rgb_array"]}
11 |
12 | def render(self, mode=None):
13 | pass
14 |
15 |
16 | class UnrecordableEnv(object):
17 | metadata = {"render.modes": [None]}
18 |
19 | def render(self, mode=None):
20 | pass
21 |
22 |
23 | def test_record_simple():
24 | env = gym.make("CartPole-v1")
25 | rec = VideoRecorder(env)
26 | env.reset()
27 | rec.capture_frame()
28 | proc = rec.encoder.proc
29 |
30 | assert proc.poll() is None # subprocess is running
31 |
32 | rec.close()
33 |
34 | assert proc.poll() is not None # subprocess is terminated
35 | assert not rec.empty
36 | assert not rec.broken
37 | assert os.path.exists(rec.path)
38 | f = open(rec.path)
39 | assert os.fstat(f.fileno()).st_size > 100
40 |
41 |
42 | def test_autoclose():
43 | def record():
44 | env = gym.make("CartPole-v1")
45 | rec = VideoRecorder(env)
46 | env.reset()
47 | rec.capture_frame()
48 |
49 | rec_path = rec.path
50 | proc = rec.encoder.proc
51 |
52 | assert proc.poll() is None # subprocess is running
53 |
54 | # The function ends without an explicit `rec.close()` call
55 | # The Python interpreter will implicitly do `del rec` on garbage cleaning
56 | return rec_path, proc
57 |
58 | rec_path, proc = record()
59 |
60 | gc.collect() # do explicit garbage collection for test
61 | time.sleep(5) # wait for subprocess exiting
62 |
63 | assert proc.poll() is not None # subprocess is terminated
64 | assert os.path.exists(rec_path)
65 | f = open(rec_path)
66 | assert os.fstat(f.fileno()).st_size > 100
67 |
68 |
69 | def test_no_frames():
70 | env = BrokenRecordableEnv()
71 | rec = VideoRecorder(env)
72 | rec.close()
73 | assert rec.empty
74 | assert rec.functional
75 | assert not os.path.exists(rec.path)
76 |
77 |
78 | def test_record_unrecordable_method():
79 | env = UnrecordableEnv()
80 | rec = VideoRecorder(env)
81 | assert not rec.enabled
82 | rec.close()
83 |
84 |
85 | def test_record_breaking_render_method():
86 | env = BrokenRecordableEnv()
87 | rec = VideoRecorder(env)
88 | rec.capture_frame()
89 | rec.close()
90 | assert rec.empty
91 | assert rec.broken
92 | assert not os.path.exists(rec.path)
93 |
94 |
95 | def test_text_envs():
96 | env = gym.make("FrozenLake-v1")
97 | video = VideoRecorder(env)
98 | try:
99 | env.reset()
100 | video.capture_frame()
101 | video.close()
102 | finally:
103 | os.remove(video.path)
104 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_clip_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym.wrappers import ClipAction
5 |
6 |
7 | def test_clip_action():
8 | # mountaincar: action-based rewards
9 | make_env = lambda: gym.make("MountainCarContinuous-v0")
10 | env = make_env()
11 | wrapped_env = ClipAction(make_env())
12 |
13 | seed = 0
14 | env.seed(seed)
15 | wrapped_env.seed(seed)
16 |
17 | env.reset()
18 | wrapped_env.reset()
19 |
20 | actions = [[0.4], [1.2], [-0.3], [0.0], [-2.5]]
21 | for action in actions:
22 | obs1, r1, d1, _ = env.step(
23 | np.clip(action, env.action_space.low, env.action_space.high)
24 | )
25 | obs2, r2, d2, _ = wrapped_env.step(action)
26 | assert np.allclose(r1, r2)
27 | assert np.allclose(obs1, obs2)
28 | assert d1 == d2
29 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_flatten_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import FlattenObservation
7 | from gym import spaces
8 |
9 |
10 | @pytest.mark.parametrize("env_id", ["Blackjack-v1"])
11 | def test_flatten_observation(env_id):
12 | env = gym.make(env_id)
13 | wrapped_env = FlattenObservation(env)
14 |
15 | obs = env.reset()
16 | wrapped_obs = wrapped_env.reset()
17 |
18 | space = spaces.Tuple((spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2)))
19 | wrapped_space = spaces.Box(0, 1, [32 + 11 + 2], dtype=np.int64)
20 |
21 | assert space.contains(obs)
22 | assert wrapped_space.contains(wrapped_obs)
23 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_frame_stack.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | pytest.importorskip("gym.envs.atari")
4 |
5 | import numpy as np
6 | import gym
7 | from gym.wrappers import FrameStack
8 |
9 | try:
10 | import lz4
11 | except ImportError:
12 | lz4 = None
13 |
14 |
15 | @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1", "Pong-v0"])
16 | @pytest.mark.parametrize("num_stack", [2, 3, 4])
17 | @pytest.mark.parametrize(
18 | "lz4_compress",
19 | [
20 | pytest.param(
21 | True,
22 | marks=pytest.mark.skipif(
23 | lz4 is None, reason="Need lz4 to run tests with compression"
24 | ),
25 | ),
26 | False,
27 | ],
28 | )
29 | def test_frame_stack(env_id, num_stack, lz4_compress):
30 | env = gym.make(env_id)
31 | env.seed(0)
32 | shape = env.observation_space.shape
33 | env = FrameStack(env, num_stack, lz4_compress)
34 | assert env.observation_space.shape == (num_stack,) + shape
35 | assert env.observation_space.dtype == env.env.observation_space.dtype
36 |
37 | dup = gym.make(env_id)
38 | dup.seed(0)
39 |
40 | obs = env.reset()
41 | dup_obs = dup.reset()
42 | assert np.allclose(obs[-1], dup_obs)
43 |
44 | for _ in range(num_stack ** 2):
45 | action = env.action_space.sample()
46 | dup_obs, _, _, _ = dup.step(action)
47 | obs, _, _, _ = env.step(action)
48 | assert np.allclose(obs[-1], dup_obs)
49 |
50 | assert len(obs) == num_stack
51 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_gray_scale_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import GrayScaleObservation
7 | from gym.wrappers import AtariPreprocessing
8 |
9 | pytest.importorskip("gym.envs.atari")
10 | pytest.importorskip("cv2")
11 |
12 |
13 | @pytest.mark.parametrize(
14 | "env_id", ["PongNoFrameskip-v0", "SpaceInvadersNoFrameskip-v0"]
15 | )
16 | @pytest.mark.parametrize("keep_dim", [True, False])
17 | def test_gray_scale_observation(env_id, keep_dim):
18 | gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True)
19 | rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False)
20 | wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
21 | assert rgb_env.observation_space.shape[-1] == 3
22 |
23 | seed = 0
24 | gray_env.seed(seed)
25 | wrapped_env.seed(seed)
26 |
27 | gray_obs = gray_env.reset()
28 | wrapped_obs = wrapped_env.reset()
29 |
30 | if keep_dim:
31 | assert wrapped_env.observation_space.shape[-1] == 1
32 | assert len(wrapped_obs.shape) == 3
33 | wrapped_obs = wrapped_obs.squeeze(-1)
34 | else:
35 | assert len(wrapped_env.observation_space.shape) == 2
36 | assert len(wrapped_obs.shape) == 2
37 |
38 | # ALE gray scale is slightly different, but no more than by one shade
39 | assert np.allclose(gray_obs.astype("int32"), wrapped_obs.astype("int32"), atol=1)
40 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_record_episode_statistics.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import gym
4 | from gym.wrappers import RecordEpisodeStatistics
5 |
6 |
7 | @pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v1"])
8 | @pytest.mark.parametrize("deque_size", [2, 5])
9 | def test_record_episode_statistics(env_id, deque_size):
10 | env = gym.make(env_id)
11 | env = RecordEpisodeStatistics(env, deque_size)
12 |
13 | for n in range(5):
14 | env.reset()
15 | assert env.episode_returns[0] == 0.0
16 | assert env.episode_lengths[0] == 0
17 | for t in range(env.spec.max_episode_steps):
18 | _, _, done, info = env.step(env.action_space.sample())
19 | if done:
20 | assert "episode" in info
21 | assert all([item in info["episode"] for item in ["r", "l", "t"]])
22 | break
23 | assert len(env.return_queue) == deque_size
24 | assert len(env.length_queue) == deque_size
25 |
26 |
27 | @pytest.mark.parametrize("num_envs", [1, 4])
28 | def test_record_episode_statistics_with_vectorenv(num_envs):
29 | envs = gym.vector.make("CartPole-v0", num_envs=num_envs, asynchronous=False)
30 | envs = RecordEpisodeStatistics(envs)
31 | envs.reset()
32 | for _ in range(envs.env.envs[0].spec.max_episode_steps + 1):
33 | _, _, dones, infos = envs.step(envs.action_space.sample())
34 | for idx, info in enumerate(infos):
35 | if dones[idx]:
36 | assert "episode" in info
37 | assert all([item in info["episode"] for item in ["r", "l", "t"]])
38 | break
39 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_record_video.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import os
4 | import shutil
5 | import gym
6 | from gym.wrappers import (
7 | RecordEpisodeStatistics,
8 | RecordVideo,
9 | capped_cubic_video_schedule,
10 | )
11 |
12 |
13 | def test_record_video_using_default_trigger():
14 |
15 | env = gym.make("CartPole-v1")
16 | env = gym.wrappers.RecordVideo(env, "videos")
17 | env.reset()
18 | for _ in range(199):
19 | action = env.action_space.sample()
20 | _, _, done, _ = env.step(action)
21 | if done:
22 | env.reset()
23 | env.close()
24 | assert os.path.isdir("videos")
25 | mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")]
26 | assert len(mp4_files) == sum(
27 | [capped_cubic_video_schedule(i) for i in range(env.episode_id + 1)]
28 | )
29 | shutil.rmtree("videos")
30 |
31 |
32 | def test_record_video_step_trigger():
33 | env = gym.make("CartPole-v1")
34 | env._max_episode_steps = 20
35 | env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
36 | env.reset()
37 | for _ in range(199):
38 | action = env.action_space.sample()
39 | _, _, done, _ = env.step(action)
40 | if done:
41 | env.reset()
42 | env.close()
43 | assert os.path.isdir("videos")
44 | mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")]
45 | assert len(mp4_files) == 2
46 | shutil.rmtree("videos")
47 |
48 |
49 | def make_env(gym_id, seed):
50 | def thunk():
51 | env = gym.make(gym_id)
52 | env._max_episode_steps = 20
53 | if seed == 1:
54 | env = gym.wrappers.RecordVideo(
55 | env, "videos", step_trigger=lambda x: x % 100 == 0
56 | )
57 | return env
58 |
59 | return thunk
60 |
61 |
62 | def test_record_video_within_vector():
63 | envs = gym.vector.SyncVectorEnv([make_env("CartPole-v1", 1 + i) for i in range(2)])
64 | envs = gym.wrappers.RecordEpisodeStatistics(envs)
65 | envs.reset()
66 | for i in range(199):
67 | _, _, _, infos = envs.step(envs.action_space.sample())
68 | for info in infos:
69 | if "episode" in info.keys():
70 | print(f"episode_reward={info['episode']['r']}")
71 | break
72 | assert os.path.isdir("videos")
73 | mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")]
74 | assert len(mp4_files) == 2
75 | shutil.rmtree("videos")
76 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_rescale_action.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import RescaleAction
7 |
8 |
9 | def test_rescale_action():
10 | env = gym.make("CartPole-v1")
11 | with pytest.raises(AssertionError):
12 | env = RescaleAction(env, -1, 1)
13 | del env
14 |
15 | env = gym.make("Pendulum-v1")
16 | wrapped_env = RescaleAction(gym.make("Pendulum-v1"), -1, 1)
17 |
18 | seed = 0
19 | env.seed(seed)
20 | wrapped_env.seed(seed)
21 |
22 | obs = env.reset()
23 | wrapped_obs = wrapped_env.reset()
24 | assert np.allclose(obs, wrapped_obs)
25 |
26 | obs, reward, _, _ = env.step([1.5])
27 | with pytest.raises(AssertionError):
28 | wrapped_env.step([1.5])
29 | wrapped_obs, wrapped_reward, _, _ = wrapped_env.step([0.75])
30 |
31 | assert np.allclose(obs, wrapped_obs)
32 | assert np.allclose(reward, wrapped_reward)
33 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_resize_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import gym
4 | from gym.wrappers import ResizeObservation
5 |
6 | pytest.importorskip("gym.envs.atari")
7 |
8 |
9 | @pytest.mark.parametrize(
10 | "env_id", ["PongNoFrameskip-v0", "SpaceInvadersNoFrameskip-v0"]
11 | )
12 | @pytest.mark.parametrize("shape", [16, 32, (8, 5), [10, 7]])
13 | def test_resize_observation(env_id, shape):
14 | env = gym.make(env_id)
15 | env = ResizeObservation(env, shape)
16 |
17 | assert env.observation_space.shape[-1] == 3
18 | obs = env.reset()
19 | if isinstance(shape, int):
20 | assert env.observation_space.shape[:2] == (shape, shape)
21 | assert obs.shape == (shape, shape, 3)
22 | else:
23 | assert env.observation_space.shape[:2] == tuple(shape)
24 | assert obs.shape == tuple(shape) + (3,)
25 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_time_aware_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import gym
4 | from gym.wrappers import TimeAwareObservation
5 |
6 |
7 | @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
8 | def test_time_aware_observation(env_id):
9 | env = gym.make(env_id)
10 | wrapped_env = TimeAwareObservation(env)
11 |
12 | assert wrapped_env.observation_space.shape[0] == env.observation_space.shape[0] + 1
13 |
14 | obs = env.reset()
15 | wrapped_obs = wrapped_env.reset()
16 | assert wrapped_env.t == 0.0
17 | assert wrapped_obs[-1] == 0.0
18 | assert wrapped_obs.shape[0] == obs.shape[0] + 1
19 |
20 | wrapped_obs, _, _, _ = wrapped_env.step(env.action_space.sample())
21 | assert wrapped_env.t == 1.0
22 | assert wrapped_obs[-1] == 1.0
23 | assert wrapped_obs.shape[0] == obs.shape[0] + 1
24 |
25 | wrapped_obs, _, _, _ = wrapped_env.step(env.action_space.sample())
26 | assert wrapped_env.t == 2.0
27 | assert wrapped_obs[-1] == 2.0
28 | assert wrapped_obs.shape[0] == obs.shape[0] + 1
29 |
30 | wrapped_obs = wrapped_env.reset()
31 | assert wrapped_env.t == 0.0
32 | assert wrapped_obs[-1] == 0.0
33 | assert wrapped_obs.shape[0] == obs.shape[0] + 1
34 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_transform_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import TransformObservation
7 |
8 |
9 | @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
10 | def test_transform_observation(env_id):
11 | affine_transform = lambda x: 3 * x + 2
12 | env = gym.make(env_id)
13 | wrapped_env = TransformObservation(
14 | gym.make(env_id), lambda obs: affine_transform(obs)
15 | )
16 |
17 | env.seed(0)
18 | wrapped_env.seed(0)
19 |
20 | obs = env.reset()
21 | wrapped_obs = wrapped_env.reset()
22 | assert np.allclose(wrapped_obs, affine_transform(obs))
23 |
24 | action = env.action_space.sample()
25 | obs, reward, done, _ = env.step(action)
26 | wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action)
27 | assert np.allclose(wrapped_obs, affine_transform(obs))
28 | assert np.allclose(wrapped_reward, reward)
29 | assert wrapped_done == done
30 |
--------------------------------------------------------------------------------
/envs/gym/tests/wrappers/test_transform_reward.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import TransformReward
7 |
8 |
9 | @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
10 | def test_transform_reward(env_id):
11 | # use case #1: scale
12 | scales = [0.1, 200]
13 | for scale in scales:
14 | env = gym.make(env_id)
15 | wrapped_env = TransformReward(gym.make(env_id), lambda r: scale * r)
16 | action = env.action_space.sample()
17 |
18 | env.seed(0)
19 | env.reset()
20 | wrapped_env.seed(0)
21 | wrapped_env.reset()
22 |
23 | _, reward, _, _ = env.step(action)
24 | _, wrapped_reward, _, _ = wrapped_env.step(action)
25 |
26 | assert wrapped_reward == scale * reward
27 | del env, wrapped_env
28 |
29 | # use case #2: clip
30 | min_r = -0.0005
31 | max_r = 0.0002
32 | env = gym.make(env_id)
33 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r))
34 | action = env.action_space.sample()
35 |
36 | env.seed(0)
37 | env.reset()
38 | wrapped_env.seed(0)
39 | wrapped_env.reset()
40 |
41 | _, reward, _, _ = env.step(action)
42 | _, wrapped_reward, _, _ = wrapped_env.step(action)
43 |
44 | assert abs(wrapped_reward) < abs(reward)
45 | assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002
46 | del env, wrapped_env
47 |
48 | # use case #3: sign
49 | env = gym.make(env_id)
50 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r))
51 |
52 | env.seed(0)
53 | env.reset()
54 | wrapped_env.seed(0)
55 | wrapped_env.reset()
56 |
57 | for _ in range(1000):
58 | action = env.action_space.sample()
59 | _, wrapped_reward, done, _ = wrapped_env.step(action)
60 | assert wrapped_reward in [-1.0, 0.0, 1.0]
61 | if done:
62 | break
63 | del env, wrapped_env
64 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: CI
4 |
5 | # Controls when the action will run. Triggers the workflow on push or pull request
6 | # events but only for the main branch
7 | on:
8 | push:
9 | branches: [ main ]
10 | pull_request:
11 | branches: [ main ]
12 |
13 | jobs:
14 | ci:
15 | strategy:
16 | matrix:
17 | python-version: [3.6, 3.7, 3.8]
18 | runs-on: ubuntu-18.04
19 | env:
20 | LD_LIBRARY_PATH: /home/runner/.mujoco/mujoco200/bin
21 | steps:
22 | - uses: actions/checkout@v2
23 | - uses: actions/setup-python@v2
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install Poetry
27 | run: pip install poetry
28 | - name: Install Mujoco Dependencies
29 | run: sudo apt update && sudo apt install libosmesa6-dev libglfw3 libglew-dev patchelf libgl1-mesa-glx -y
30 | - name: Install Mujoco
31 | run: |
32 | mkdir ~/.mujoco
33 | curl -sO https://www.roboti.us/download/mujoco200_linux.zip
34 | unzip mujoco200_linux.zip
35 | mv mujoco200_linux ~/.mujoco/mujoco200
36 | echo "${{ secrets.MUJOCO_KEY }}" > ~/.mujoco/mjkey.txt
37 | - name: Run tests
38 | run: |
39 | poetry install
40 | poetry run python -m pip freeze
41 | poetry run test
42 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | **/__pycache__/
3 | **/.pytest_cache/
4 | **/dist/
5 | **/build/
6 | **/*.egg-info/
7 | **/.mypy_cache/
8 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include mujoco_maze/assets/*
2 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/mujoco_maze/agent_model.py:
--------------------------------------------------------------------------------
1 | """Common APIs for defining mujoco robot.
2 | """
3 | from abc import ABC, abstractmethod
4 | from typing import Optional
5 |
6 | import numpy as np
7 | from gym.envs.mujoco.mujoco_env import MujocoEnv
8 | from gym.utils import EzPickle
9 |
10 |
11 | class AgentModel(ABC, MujocoEnv, EzPickle):
12 | FILE: str
13 | MANUAL_COLLISION: bool
14 | ORI_IND: Optional[int] = None
15 | RADIUS: Optional[float] = None
16 | OBJBALL_TYPE: Optional[str] = None
17 |
18 | def __init__(self, file_path: str, frame_skip: int) -> None:
19 | MujocoEnv.__init__(self, file_path, frame_skip)
20 | EzPickle.__init__(self)
21 |
22 | def close(self):
23 | if self.viewer is not None and hasattr(self.viewer, "window"):
24 | import glfw
25 |
26 | glfw.destroy_window(self.viewer.window)
27 | super().close()
28 |
29 | @abstractmethod
30 | def _get_obs(self) -> np.ndarray:
31 | """Returns the observation from the model."""
32 | pass
33 |
34 | def get_xy(self) -> np.ndarray:
35 | """Returns the coordinate of the agent."""
36 | pass
37 |
38 | def set_xy(self, xy: np.ndarray) -> None:
39 | """Set the coordinate of the agent."""
40 | pass
41 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/mujoco_maze/assets/point.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/mujoco_maze/assets/reacher.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/mujoco_maze/assets/swimmer.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/mujoco_maze/point.py:
--------------------------------------------------------------------------------
1 | """
2 | A ball-like robot as an explorer in the maze.
3 | Based on `models`_ and `rllab`_.
4 |
5 | .. _models: https://github.com/tensorflow/models/tree/master/research/efficient-hrl
6 | .. _rllab: https://github.com/rll/rllab
7 | """
8 |
9 | from typing import Optional, Tuple
10 |
11 | import gym
12 | import numpy as np
13 |
14 | from mujoco_maze.agent_model import AgentModel
15 |
16 |
17 | class PointEnv(AgentModel):
18 | FILE: str = "point.xml"
19 | ORI_IND: int = 2
20 | MANUAL_COLLISION: bool = True
21 | RADIUS: float = 0.4
22 | OBJBALL_TYPE: str = "hinge"
23 |
24 | VELOCITY_LIMITS: float = 10.0
25 |
26 | def __init__(self, file_path: Optional[str] = None) -> None:
27 | super().__init__(file_path, 1)
28 | high = np.inf * np.ones(6, dtype=np.float32)
29 | high[3:] = self.VELOCITY_LIMITS * 1.2
30 | high[self.ORI_IND] = np.pi
31 | low = -high
32 | self.observation_space = gym.spaces.Box(low, high)
33 |
34 | def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
35 | qpos = self.sim.data.qpos.copy()
36 | qpos[2] += action[1]
37 | # Clip orientation
38 | if qpos[2] < -np.pi:
39 | qpos[2] += np.pi * 2
40 | elif np.pi < qpos[2]:
41 | qpos[2] -= np.pi * 2
42 | ori = qpos[2]
43 | # Compute increment in each direction
44 | qpos[0] += np.cos(ori) * action[0]
45 | qpos[1] += np.sin(ori) * action[0]
46 | qvel = np.clip(self.sim.data.qvel, -self.VELOCITY_LIMITS, self.VELOCITY_LIMITS)
47 | self.set_state(qpos, qvel)
48 | for _ in range(0, self.frame_skip):
49 | self.sim.step()
50 | next_obs = self._get_obs()
51 | return next_obs, 0.0, False, {}
52 |
53 | def _get_obs(self):
54 | return np.concatenate(
55 | [
56 | self.sim.data.qpos.flat[:3], # Only point-relevant coords.
57 | self.sim.data.qvel.flat[:3],
58 | ]
59 | )
60 |
61 | def reset_model(self):
62 | qpos = self.init_qpos # + self.np_random.uniform(
63 | # size=self.sim.model.nq, low=-0.1, high=0.1
64 | # )
65 | qvel = self.init_qvel # + self.np_random.randn(self.sim.model.nv) * 0.1
66 |
67 | # Set everything other than point to original position and 0 velocity.
68 | qpos[3:] = self.init_qpos[3:]
69 | qvel[3:] = 0.0
70 | self.set_state(qpos, qvel)
71 | return self._get_obs()
72 |
73 | def get_xy(self):
74 | return self.sim.data.qpos[:2].copy()
75 |
76 | def set_xy(self, xy: np.ndarray) -> None:
77 | qpos = self.sim.data.qpos.copy()
78 | qpos[:2] = xy
79 | self.set_state(qpos, self.sim.data.qvel)
80 |
81 | def get_ori(self):
82 | return self.sim.data.qpos[self.ORI_IND]
83 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/mujoco_maze/reacher.py:
--------------------------------------------------------------------------------
1 | """
2 | Based on the reacher in `dm_control`_.
3 |
4 | .. _dm_control: https://github.com/deepmind/dm_control
5 | """
6 |
7 | from typing import Tuple
8 |
9 | import numpy as np
10 |
11 | from mujoco_maze.agent_model import AgentModel
12 | from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm
13 |
14 |
15 | class ReacherEnv(AgentModel):
16 | FILE: str = "reacher.xml"
17 | MANUAL_COLLISION: bool = False
18 |
19 | def __init__(
20 | self,
21 | file_path: str = None,
22 | forward_reward_weight: float = 1.0,
23 | ctrl_cost_weight: float = 1e-4,
24 | forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
25 | ) -> None:
26 | self._forward_reward_weight = forward_reward_weight
27 | self._ctrl_cost_weight = ctrl_cost_weight
28 | self._forward_reward_fn = forward_reward_fn
29 | super().__init__(file_path, 4)
30 |
31 | def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
32 | xy_pos_after = self.sim.data.qpos[:2].copy()
33 | xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
34 | return self._forward_reward_fn(xy_velocity)
35 |
36 | def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
37 | xy_pos_before = self.sim.data.qpos[:2].copy()
38 | self.do_simulation(action, self.frame_skip)
39 |
40 | forward_reward = self._forward_reward(xy_pos_before)
41 | ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
42 | return (
43 | self._get_obs(),
44 | self._forward_reward_weight * forward_reward - ctrl_cost,
45 | False,
46 | dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
47 | )
48 |
49 | def _get_obs(self) -> np.ndarray:
50 | position = self.sim.data.qpos.flat.copy()
51 | velocity = self.sim.data.qvel.flat.copy()
52 | observation = np.concatenate([position, velocity]).ravel()
53 | return observation
54 |
55 | def reset_model(self) -> np.ndarray:
56 | qpos = self.init_qpos + self.np_random.uniform(
57 | low=-0.1,
58 | high=0.1,
59 | size=self.model.nq,
60 | )
61 | qvel = self.init_qvel + self.np_random.uniform(
62 | low=-0.1,
63 | high=0.1,
64 | size=self.model.nv,
65 | )
66 |
67 | self.set_state(qpos, qvel)
68 | return self._get_obs()
69 |
70 | def set_xy(self, xy: np.ndarray) -> None:
71 | qpos = self.sim.data.qpos.copy()
72 | qpos[:2] = xy
73 | self.set_state(qpos, self.sim.data.qvel)
74 |
75 | def get_xy(self) -> np.ndarray:
76 | return np.copy(self.sim.data.qpos[:2])
77 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/mujoco_maze/swimmer.py:
--------------------------------------------------------------------------------
1 | """
2 | Swimmer robot as an explorer in the maze.
3 | Based on `gym`_ (swimmer-v3).
4 |
5 | .. _gym: https://github.com/openai/gym
6 | """
7 |
8 | from typing import Tuple
9 |
10 | import numpy as np
11 |
12 | from mujoco_maze.agent_model import AgentModel
13 | from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm
14 |
15 |
16 | class SwimmerEnv(AgentModel):
17 | FILE: str = "swimmer.xml"
18 | MANUAL_COLLISION: bool = False
19 |
20 | def __init__(
21 | self,
22 | file_path: str = None,
23 | forward_reward_weight: float = 1.0,
24 | ctrl_cost_weight: float = 1e-4,
25 | forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
26 | ) -> None:
27 | self._forward_reward_weight = forward_reward_weight
28 | self._ctrl_cost_weight = ctrl_cost_weight
29 | self._forward_reward_fn = forward_reward_fn
30 | super().__init__(file_path, 4)
31 |
32 | def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
33 | xy_pos_after = self.sim.data.qpos[:2].copy()
34 | xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
35 | return self._forward_reward_fn(xy_velocity)
36 |
37 | def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
38 | xy_pos_before = self.sim.data.qpos[:2].copy()
39 | self.do_simulation(action, self.frame_skip)
40 | forward_reward = self._forward_reward(xy_pos_before)
41 | ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
42 | return (
43 | self._get_obs(),
44 | self._forward_reward_weight * forward_reward - ctrl_cost,
45 | False,
46 | dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
47 | )
48 |
49 | def _get_obs(self) -> np.ndarray:
50 | position = self.sim.data.qpos.flat.copy()
51 | velocity = self.sim.data.qvel.flat.copy()
52 | observation = np.concatenate([position, velocity]).ravel()
53 | return observation
54 |
55 | def reset_model(self) -> np.ndarray:
56 | qpos = self.init_qpos + self.np_random.uniform(
57 | low=-0.1,
58 | high=0.1,
59 | size=self.model.nq,
60 | )
61 | qvel = self.init_qvel + self.np_random.uniform(
62 | low=-0.1,
63 | high=0.1,
64 | size=self.model.nv,
65 | )
66 |
67 | self.set_state(qpos, qvel)
68 | return self._get_obs()
69 |
70 | def set_xy(self, xy: np.ndarray) -> None:
71 | qpos = self.sim.data.qpos.copy()
72 | qpos[:2] = xy
73 | self.set_state(qpos, self.sim.data.qvel)
74 |
75 | def get_xy(self) -> np.ndarray:
76 | return np.copy(self.sim.data.qpos[:2])
77 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "mujoco-maze"
3 | version = "0.2.0"
4 | description = "Simple maze environments using mujoco-py"
5 | license = "Apache-2.0"
6 | authors = ["Yuji Kanagawa "]
7 | readme = "README.md" # Markdown files are supported
8 |
9 | repository = "https://github.com/kngwyu/mujoco-maze"
10 | homepage = "https://github.com/kngwyu/mujoco-maze"
11 |
12 | [tool.poetry.dependencies]
13 | python = ">=3.6" # Compatible python versions must be declared here
14 | gym = ">=0.16"
15 | mujoco-py = ">=1.5"
16 |
17 | [tool.poetry.dev-dependencies]
18 | pytest = "^6.2"
19 |
20 | [tool.poetry.scripts]
21 | test = "pytest:main"
22 |
23 | [tool.black]
24 | line-length = 88
25 | target-version = ['py36']
26 | include = '\.pyi?$'
27 | exclude = '''
28 |
29 | (
30 | /(
31 | \.eggs
32 | | \.git
33 | | \.mypy_cache
34 | | _build
35 | | buck-out
36 | | build
37 | | dist
38 | )/
39 | )
40 | '''
--------------------------------------------------------------------------------
/envs/mujoco-maze/screenshots/AntFall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/AntFall.png
--------------------------------------------------------------------------------
/envs/mujoco-maze/screenshots/AntPush.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/AntPush.png
--------------------------------------------------------------------------------
/envs/mujoco-maze/screenshots/Point4Rooms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/Point4Rooms.png
--------------------------------------------------------------------------------
/envs/mujoco-maze/screenshots/PointBilliard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/PointBilliard.png
--------------------------------------------------------------------------------
/envs/mujoco-maze/screenshots/PointCorridor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/PointCorridor.png
--------------------------------------------------------------------------------
/envs/mujoco-maze/screenshots/PointUMaze.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/PointUMaze.png
--------------------------------------------------------------------------------
/envs/mujoco-maze/screenshots/SwimmerSquareRoom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/SwimmerSquareRoom.png
--------------------------------------------------------------------------------
/envs/mujoco-maze/screenshots/WebVis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/envs/mujoco-maze/screenshots/WebVis.png
--------------------------------------------------------------------------------
/envs/mujoco-maze/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | ignore = W391, W503, F821, E203, E231
4 |
5 | [mypy]
6 | python_version = 3.6
7 | ignore_missing_imports = True
8 | warn_unused_configs = True
9 |
10 | [isort]
11 | multi_line_output=3
12 | include_trailing_comma=True
13 | force_grid_wrap=0
14 | use_parentheses=True
15 | line_length=88
16 | lines_between_types=0
17 | combine_as_imports=True
18 | known_first_party=rainy, tests
19 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/setup.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 | import re
4 |
5 | from setuptools import find_packages, setup
6 |
7 | NAME = "mujoco-maze"
8 | AUTHOR = "Yuji Kanagawa"
9 | EMAIL = "yuji.kngw.80s.revive@gmail.com"
10 | URL = "https://github.com/kngwyu/mujoco-maze"
11 | REQUIRES_PYTHON = ">=3.6.0"
12 | DESCRIPTION = "Simple maze environments using mujoco-py"
13 |
14 | here = os.path.abspath(os.path.dirname(__file__))
15 |
16 | with io.open(os.path.join(here, "mujoco_maze/__init__.py"), "rt", encoding="utf8") as f:
17 | VERSION = re.search(r"__version__ = \"(.*?)\"", f.read()).group(1)
18 |
19 | try:
20 | with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
21 | LONG_DESCRIPTION = "\n" + f.read()
22 | except FileNotFoundError:
23 | LONG_DESCRIPTION = DESCRIPTION
24 |
25 |
26 | REQUIRED = ["gym>=0.16.0", "mujoco-py>=1.5.0"]
27 |
28 |
29 | setup(
30 | name=NAME,
31 | version=VERSION,
32 | url=URL,
33 | project_urls={
34 | "Code": URL,
35 | "Issue tracker": URL + "/issues",
36 | },
37 | author=AUTHOR,
38 | author_email=EMAIL,
39 | description=DESCRIPTION,
40 | long_description=LONG_DESCRIPTION,
41 | long_description_content_type="text/markdown",
42 | packages=find_packages(),
43 | python_requires=REQUIRES_PYTHON,
44 | install_requires=REQUIRED,
45 | license="Apache2",
46 | classifiers=[
47 | "License :: OSI Approved :: Apache Software License",
48 | "Development Status :: 3 - Alpha",
49 | "Intended Audience :: Science/Research",
50 | "Natural Language :: English",
51 | "Operating System :: OS Independent",
52 | "Programming Language :: Python",
53 | "Programming Language :: Python :: 3",
54 | "Programming Language :: Python :: 3.6",
55 | "Programming Language :: Python :: 3.7",
56 | "Programming Language :: Python :: 3.8",
57 | "Programming Language :: Python :: Implementation :: CPython",
58 | ],
59 | )
60 |
--------------------------------------------------------------------------------
/envs/mujoco-maze/tests/test_intersect.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from mujoco_maze.maze_env_utils import Line
5 |
6 |
7 | @pytest.mark.parametrize(
8 | "l1, l2, p, ans",
9 | [
10 | ((0.0, 0.0), (4.0, 4.0), (1.0, 3.0), 2.0 ** 0.5),
11 | ((-3.0, -3.0), (0.0, 1.0), (-3.0, 1.0), 2.4),
12 | ],
13 | )
14 | def test_distance(l1, l2, p, ans):
15 | line = Line(l1, l2)
16 | point = complex(*p)
17 | assert abs(line.distance(point) - ans) <= 1e-8
18 |
19 |
20 | @pytest.mark.parametrize(
21 | "l1p1, l1p2, l2p1, l2p2, none",
22 | [
23 | ((0.0, 0.0), (1.0, 0.0), (0.0, -1.0), (1.0, 1.0), False),
24 | ((1.0, 1.0), (2.0, 3.0), (-1.0, 1.5), (1.5, 1.0), False),
25 | ((1.5, 1.5), (2.0, 3.0), (-1.0, 1.5), (1.5, 1.0), True),
26 | ((0.0, 0.0), (2.0, 0.0), (1.0, 0.0), (1.0, 3.0), False),
27 | ],
28 | )
29 | def test_intersect(l1p1, l1p2, l2p1, l2p2, none):
30 | l1 = Line(l1p1, l1p2)
31 | l2 = Line(l2p1, l2p2)
32 | i1 = l1.intersect(l2)
33 | i2 = line_intersect(l1p1, l1p2, l2p1, l2p2)
34 | if none:
35 | assert i1 is None and i2 is None
36 | else:
37 | assert i1 is not None
38 | i1 = np.array([i1.real, i1.imag])
39 | np.testing.assert_array_almost_equal(i1, np.array(i2))
40 |
41 |
42 | def line_intersect(pt1, pt2, ptA, ptB):
43 | """
44 | Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html
45 | Returns the intersection of Line(pt1,pt2) and Line(ptA,ptB).
46 | """
47 | import math
48 |
49 | DET_TOLERANCE = 0.00000001
50 |
51 | # the first line is pt1 + r*(pt2-pt1)
52 | # in component form:
53 | x1, y1 = pt1
54 | x2, y2 = pt2
55 | dx1 = x2 - x1
56 | dy1 = y2 - y1
57 |
58 | # the second line is ptA + s*(ptB-ptA)
59 | x, y = ptA
60 | xB, yB = ptB
61 | dx = xB - x
62 | dy = yB - y
63 |
64 | DET = -dx1 * dy + dy1 * dx
65 |
66 | if math.fabs(DET) < DET_TOLERANCE:
67 | return None
68 |
69 | # now, the determinant should be OK
70 | DETinv = 1.0 / DET
71 |
72 | # find the scalar amount along the "self" segment
73 | r = DETinv * (-dy * (x - x1) + dx * (y - y1))
74 |
75 | # find the scalar amount along the input line
76 | s = DETinv * (-dy1 * (x - x1) + dx1 * (y - y1))
77 |
78 | # return the average of the two descriptions
79 | xi = (x1 + r * dx1 + x + s * dx) / 2.0
80 | yi = (y1 + r * dy1 + y + s * dy) / 2.0
81 | if r >= 0 and 0 <= s <= 1:
82 | return xi, yi
83 | else:
84 | return None
85 |
--------------------------------------------------------------------------------
/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PeideHuang/gradient/5c444ad522942527ec50cf886b1fdfcc38e5838c/helpers/__init__.py
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.12.1
2 | torch-tb-profiler==0.3.1
3 | tensorboard==2.8.0
4 | tensorboard-data-server==0.6.1
5 | tensorboard-plugin-wit==1.8.1
6 | sklearn==0.0
7 | scikit-image==0.15.0
8 | scikit-learn==1.0.2
9 | scipy==1.8.0
10 | tqdm==4.63.0
11 | numpy==1.22.2
12 | mujoco-py==2.1.2.14
13 | matplotlib==3.5.1
14 | gpytorch==1.9.0
15 | protobuf==3.19.4
16 | stable_baselines3==1.4.1a1
17 | POT==0.9.0
18 | Cython==0.29.28
19 | setuptools==58.0.4
--------------------------------------------------------------------------------