├── .gitignore
├── LICENSE
├── README.md
├── assets
├── Falcon.png
└── Mcgill.png
├── docs
├── 2d_navigation_envs.md
├── _config.yml
├── benchmark_algos
│ └── trpo.md
├── benchmarks.md
├── discrete_envs.md
├── falcon_rocket.md
├── index.md
├── mujoco_gym_envs.md
└── wrappers.md
├── examples
├── EnvironmentGeneration.ipynb
├── image_based_navigation_example.py
├── manual_control_example.py
├── range_based_navigation_example.py
└── state_based_navigation_example.py
├── gym_extensions
├── __init__.py
├── continuous
│ ├── __init__.py
│ ├── box2d
│ │ ├── __init__.py
│ │ └── falcon_drone_ship_lander.py
│ ├── gym_navigation_2d
│ │ ├── __init__.py
│ │ ├── assets
│ │ │ └── worlds_640x480_v0.pkl
│ │ ├── env_generator.py
│ │ ├── env_utils.py
│ │ ├── geometry_utils.py
│ │ ├── gym_rendering.py
│ │ ├── image_based_navigation.py
│ │ ├── range_based_navigation.py
│ │ ├── rrt.py
│ │ └── test_env_generator.py
│ └── mujoco
│ │ ├── __init__.py
│ │ ├── gravity_envs.py
│ │ ├── modified_ant.py
│ │ ├── modified_arm.py
│ │ ├── modified_half_cheetah.py
│ │ ├── modified_hopper.py
│ │ ├── modified_humanoid.py
│ │ ├── modified_walker2d.py
│ │ ├── perturbed_bodypart_env.py
│ │ └── wall_envs.py
├── discrete
│ ├── __init__.py
│ └── classic
│ │ ├── __init__.py
│ │ ├── cartpole.py
│ │ ├── cartpole_contextual.py
│ │ └── pendulum_contextual.py
├── tests
│ ├── README.md
│ ├── __init__.py
│ ├── test_contextual_envs.py
│ ├── test_import.py
│ ├── test_mujoco_import.py
│ └── test_wrappers.py
└── wrappers
│ ├── __init__.py
│ ├── normalized_env.py
│ ├── observation_transform_wrapper.py
│ ├── proxy_env.py
│ ├── serializable.py
│ └── transformers.py
├── multitask_benchmarks
├── __init__.py
├── commands_run
├── run_trpo.py
└── utils.py
├── requirements.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # swp
71 | # *.swp
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # dotenv
85 | .env
86 |
87 | # virtualenv
88 | .venv
89 | venv/
90 | ENV/
91 |
92 | # Spyder project settings
93 | .spyderproject
94 | .spyproject
95 |
96 | # Rope project settings
97 | .ropeproject
98 |
99 | # mkdocs documentation
100 | /site
101 |
102 | # mypy
103 | .mypy_cache/
104 |
105 | data/
106 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Peter Henderson
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | # OpenAI gym
24 | This work is derived from [OpenAI Gym](https://github.com/openai/gym) used under the MIT License.
25 |
26 | # Mujoco models
27 | This work is derived from [MuJoCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
28 | ```
29 | This file is part of MuJoCo.
30 | Copyright 2009-2015 Roboti LLC.
31 | Mujoco :: Advanced physics simulation engine
32 | Source : www.roboti.us
33 | Version : 1.31
34 | Released : 23Apr16
35 | Author :: Vikash Kumar
36 | Contacts : kumar@roboti.us
37 | ```
38 |
39 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # gym-extensions
4 | This python package is an extension to OpenAI Gym for auxiliary tasks (multitask learning, transfer learning, inverse reinforcement learning, etc.)
5 |
6 |
7 | ## Dependencies
8 |
9 | - [Python 3.5.2](https://www.python.org/)
10 | - [OpenAI Gym](https://gym.openai.com/)
11 | - [MuJoCo](http://mujoco.org/) (Optional)
12 | - [mujoco-py](https://github.com/openai/mujoco-py#install-mujoco) (Optional)
13 | - [roboschool](https://github.com/openai/roboschool) (Optional)
14 |
15 | ## Installation
16 |
17 | Check out the latest code:
18 | ```bash
19 | git clone https://github.com/Breakend/gym-extensions-multitask.git
20 | ```
21 |
22 | ### Install without MuJoCo Support
23 | ```bash
24 | pip3 install -e .
25 | ```
26 |
27 | ### Install with MuJoCo Support
28 | Install MuJoCo according to [mujoco-py](https://github.com/openai/mujoco-py#install-mujoco).
29 | 1. Obtain license for [MuJoCo](http://mujoco.org/)
30 | 2. Download MuJoCo 1.50 binaries
31 | 3. Unzip into `mjpro150` directory `~/.mujoco/mjproj150` and place license
32 | at `~/.mujoco/mjkey.txt`
33 | 4. Finally, install gym-extensions with mujoco-py enabled:
34 |
35 | ```bash
36 | pip3 install -e .[mujoco]
37 | ```
38 |
39 | ### Test Installation
40 | ```
41 | nosetests -v gym_extensions
42 | ```
43 |
44 | ### Possible Issues
45 |
46 | Due to the dependency on OpenAI gym you may have some trouble when installing gym on macOS, to remedy:
47 |
48 | ```bash
49 | # as per: https://github.com/openai/gym/issues/164
50 | export MACOSX_DEPLOYMENT_TARGET=10.12; pip install -e .
51 | ```
52 |
53 | Also, if you get the following error:
54 | ```
55 | >>> import matplotlib.pyplot
56 | Traceback (most recent call last):
57 | File "", line 1, in
58 | File "~/anaconda2/lib/python2.7/site-packages/matplotlib/pyplot.py", line 115, in
59 | _backend_mod, new_figure_manager, draw_if_interactive, _show = pylab_setup()
60 | File "~/anaconda2/lib/python2.7/site-packages/matplotlib/backends/__init__.py", line 32, in pylab_setup
61 | globals(),locals(),[backend_name],0)
62 | File "~/anaconda2/lib/python2.7/site-packages/matplotlib/backends/backend_gtk.py", line 19, in
63 | raise ImportError("Gtk* backend requires pygtk to be installed.")
64 | ImportError: Gtk* backend requires pygtk to be installed.
65 | ```
66 | the easiest fix is to switch backends for matplotlib. You can do this by setting `backend: TkAgg` in `~/.config/matplotlib/matplotlibrc` or `~/.matplotlib/matplotlibrc`
67 |
68 | ## Usage
69 |
70 | For specific environments (you don't necessarily want to import the whole project)
71 |
72 | ```python
73 | from gym_extensions.continuous import gym_navigation_2d
74 | env = gym.make("State-Based-Navigation-2d-Map1-Goal1-v0")
75 | ```
76 |
77 | ```python
78 | from gym_extensions.continuous import mujoco
79 | env = gym.make("HopperWall-v0")
80 | ```
81 |
82 |
83 | ## More info
84 |
85 | More information will be provided on our doc website: https://breakend.github.io/gym-extensions/
86 |
87 | ## Contributions
88 |
89 | To contributing environments please use the general directory structure we have in place and provide **pull requests**. We're still working on making this extension to OpenAI gym the best possible so things may change. Any changes to existing environments should involve an incremental update to the name of the environment (i.e. Hopper-v0 vs. Hopper-v1). If you are not associated with McGill and contribute significantly, please add your association to:
90 |
91 | docs/index.md
92 |
93 | ## References
94 |
95 | Some of this work borrowed ideas and code from OpenAI rllab and OpenAI Gym. We thank those creators for their work and cite links to reference code inline where possible.
96 |
97 | ## Contributors
98 |
99 | Here's a list of contributors!
100 |
101 | + @Breakend
102 | + @weidi-chang
103 | + @florianshkurti
104 | + @johannah
105 | + @vBarbaros
106 | + @mklissa
107 |
108 | ## Who's using this?
109 |
110 | Works that have used this framework include:
111 |
112 | ```
113 | Klissarov, Martin, Pierre-Luc Bacon, Jean Harb, and Doina Precup. "Learnings Options End-to-End for Continuous Action Tasks." arXiv preprint arXiv:1712.00004 (2017).
114 |
115 | Henderson, Peter, Wei-Di Chang, Pierre-Luc Bacon, David Meger, Joelle Pineau, and Doina Precup. "OptionGAN: Learning Joint Reward-Policy Options using Generative Adversarial Inverse Reinforcement Learning." arXiv preprint arXiv:1709.06683 (2017).
116 | ```
117 |
118 | ## Citation for this work!
119 |
120 | If you use this work please use the following citation. If using the Space X environment, please also reference @vBarbaros for credit.
121 |
122 | ```
123 | @article{henderson2017multitask,
124 | author = {{Henderson}, P. and {Chang}, W.-D. and {Shkurti}, F. and {Hansen}, J. and
125 | {Meger}, D. and {Dudek}, G.},
126 | title = {Benchmark Environments for Multitask Learning in Continuous Domains},
127 | journal = {ICML Lifelong Learning: A Reinforcement Learning Approach Workshop},
128 | year={2017}
129 | }
130 | ```
131 |
--------------------------------------------------------------------------------
/assets/Falcon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/assets/Falcon.png
--------------------------------------------------------------------------------
/assets/Mcgill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/assets/Mcgill.png
--------------------------------------------------------------------------------
/docs/2d_navigation_envs.md:
--------------------------------------------------------------------------------
1 | ## 2D continuous navigation envs
2 |
3 | We also provide several novel 2D environments that focus on navigation tasks with continuous action spaces to enable benchmarking of learning tasks requiring an implicit memory. The tasks take place in a given occupancy grid map. We opt to make the layout and shape of the obstacles as the only disambiguating feature for localizing within the map. Aside from that information, the environment does not have any texture mapping or other distinctive features.
4 | We provide three different types of navigation tasks, increasing in level of difficulty:
5 |
6 | + Image-based navigation where the agent has access to the entire map, including its own position within the map and the destination in the map as part of the image data.
7 | + State-based navigation, where the agent has access to its own position in the map and the distance and bearing to the closest obstacle. A simpler version also contains the destination coordinates.
8 | + Navigation based only on local range-and-bearing data around the agent using ray-tracing. It has to perform mapping and estimate its own position within the map (i.e. perform SLAM), while at the same time exploring to find the goal location, and learning to avoid obstacles. We also modify this with a simpler version, where the goal and current position are known as well.
9 |
10 | We provide a reward of -1 for every timestep, -5 for obstacle collisions, and +10 for reaching the goal (which also ends the task, similarly to the MountainCar-v0 environment in OpenAI Gym). The action space is the bounded velocity to apply in the x and y directions.
11 |
12 | **Environments**
13 |
14 | + Learning to navigate and search in 2D environments observing only raytracing distance readouts (Limited-RangeBased-Navigation-2d-Map{0-9}-Goal{0-2}-v0)
15 | + Learning to navigate and search in 2D environments observing current position, raytracing distance readouts, and known goal position (Limited-Range-Based-Navigation-2dMap{0-9}-Goal{0-2}-KnownPositions-v0)
16 | + Learning to navigate and search in 2D environments observing only the 2D map image with goal location and current position highlighted in different colors (Image-BasedNavigation-2d-Map{0-9}-Goal{0-2}-v0)
17 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
2 | show_downloads: true
3 | markdown: kramdown
4 |
--------------------------------------------------------------------------------
/docs/benchmarks.md:
--------------------------------------------------------------------------------
1 | # Benchmarks!
2 |
3 | Part of our goal in releasing these environments is so people can upload their algorithm benchmark results. In the spirit of doing so, we start off with some simple TRPO results. First, we define several task groupings which may be of use to run together. Then we provide some results from running TRPO sequentially across these groupings. While these aren't the only set of combinations, task groupings, and experiments that can be done with these environments, we show that our results show some fundamental problems with lifelong learning or multitask learning (such as catastrophic forgetting, information generalization, etc.).
4 |
5 | ## Possible Task Groupings
6 |
7 | ## Benchmark Results
8 |
9 | + [TRPO](benchmark_algos/trpo)
10 |
11 | ## Uploading Algorithm Results
12 |
13 | To upload your algorithms' results, please send a pull request with a doc page as in the *docs/benchmark_algos/trpo* folder.
14 |
15 | We require three sections:
16 |
17 | 1. Algorithm description and link to code (also include hyperparameters used)
18 | 2. Task Groupings and Experimental Setup (number of trials, sequential vs. simultaneous learning, etc.)
19 | 3. Results (Average Reward, Std. Deviation of reward, number of trials, etc. on each environment in the grouping)
20 |
--------------------------------------------------------------------------------
/docs/discrete_envs.md:
--------------------------------------------------------------------------------
1 |
2 | ## Discrete Envs
3 |
4 | We are adding support for discrete envs that are customizable for simple transfer learning tasks. An example is:
5 |
6 | ```python
7 | gym_extensions.discrete.classic.cartpole.register_custom_cartpole("CartpoleLongPole-v0", gravity=9.8, masscart=1.0, masspole=0.1, pole_length=1.0, force_mag=10.0)
8 | ```
9 |
10 | Which you can then make with, gym.make("CartpoleLongPole-v0").
11 |
12 | While we want to support these custom envs, the main goal of this project is to come up with a standard set of added tasks which can be used for in the contexts of MultiTask or LifeLong learning, particularly in Continuous domains. To that extent, we come up with the following.
13 |
--------------------------------------------------------------------------------
/docs/falcon_rocket.md:
--------------------------------------------------------------------------------
1 | ## SpaceX-like Falcon Rocket
2 |
3 | This Falcon rocket lander environment encourages landing on an autonomous floating drone ship. It uses part of the LunarLander environment as a base (from OpenAI Gym ) and adds some elements that make the task more difficult and interesting as follows.
4 |
5 | The state-dimensions contains now 13 params, some of which are part of the drone-ship. The difficulty of the task can be managed in many ways (e.g., adding more/less power to rocket's engines, increasing/decreasing drone's force that pushes it around, so that it oscillates from left to right and back with different velocity, etc).
6 |
7 | The reward function accounts for the falcon rocket landing on both legs (otherwise it's not quite a landing), for the angle and the velocity of the drone ship (higher speed, more unstable drone ship, higher the reward); the episodes are done once the rocket has landed, or if the rocket goes below the drone ship level. It also penalizes longer episodes, i.e., more time-steps until a successful landing results in lower reward.
8 |
9 | You can land the SpaceX Falcon rocket from the keyboard as well.
10 |
11 | [](https://www.youtube.com/watch?v=GpHylnSox_Q&feature=youtu.be "Hope you'll like it!")
12 |
13 | Check the source code here
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | Welcome to **gym-extensions**! We're building on top of OpenAI Gym in the pursuit of creating environments suitable for benchmarking a wide variety of auxiliary tasks. On the RoadMap right now are:
2 |
3 | + Multitask, Transfer, and LifeLong Learning Benchmarks
4 | + Learning to navigate and search tasks
5 | + More realistic robotics tasks integrated with ROS and Gazebo
6 |
7 | There's a lot of info to go over!
8 |
9 | + You can read about the modified Mujoco OpenAI continuous domain tasks [here](mujoco_gym_envs)
10 | + You can read about new 2D navigation continuous domain tasks [here](2d_navigation_envs)
11 | + We also provide a set of wrappers to process agent observations in tasks as described [here](wrappers)
12 | + Finally, you can enjoy a Brand New SpaceX-like Falcon rocket that can be thought to land on the autonomous drone ship [here](falcon_rocket)
13 |
14 | We want all the envs and pull requests we can get! Open an issue if you're confused and we'll fix it ASAP.
15 |
16 | In progress documentation:
17 |
18 | + A website for benchmarked multitask runs
19 | + Pictures and videos
20 | + A nicer layout
21 | + How to create new environments and contribute
22 |
23 | ### Brought to you by
24 |
25 | {:width="30%" class="img-responsive"}
26 |
--------------------------------------------------------------------------------
/docs/mujoco_gym_envs.md:
--------------------------------------------------------------------------------
1 |
16 |
17 |
20 |
21 | ## Continuous Mujoco Modified OpenAI Gym Environments
22 |
23 | ### Modified Gravity
24 |
25 | For the running agents, we provide ready environments with various scales of simulated earth-like gravity, ranging from one half to one and a half of the normal gravity level (−4.91 to −12.26 $$m\cdot s^{-2}$$ in increments of $$.25\cdot g$$ earth gravity). We propose that a successful multitask learning algorithm will extract the underlying walking action structure and reuse the applicable knowledge without forgetting how to walk in varying gravity conditions.
26 |
27 | **HopperGravityHalf-v0**
28 |
29 | The standard Mujoco OpenAI gym hopper task with gravity scaled by 0.5
30 |
31 | **HopperGravityThreeQuarters-v0**
32 |
33 | The standard Mujoco OpenAI gym hopper task with gravity scaled by 0.75
34 |
35 | **HopperGravityOneAndQuarter-v0**
36 |
37 | The standard Mujoco OpenAI gym hopper task with gravity scaled by 1.25
38 |
39 | **HopperGravityOneAndHalf-v0**
40 |
41 | The standard Mujoco OpenAI gym hopper task with gravity scaled by 1.5
42 |
43 | **Similarly:**
44 |
45 | + Walker2dGravityHalf-v0
46 | + Walker2dGravityThreeQuarters-v0
47 | + Walker2dGravityOneAndQuarter-v0
48 | + Walker2dGravityOneAndHalf-v0
49 | + HalfCheetahGravityHalf-v0
50 | + HalfCheetahGravityThreeQuarters-v0
51 | + HalfCheetahGravityOneAndQuarter-v0
52 | + HalfCheetahGravityOneAndHalf-v0
53 | + HumanoidGravityHalf-v0
54 | + HumanoidGravityThreeQuarters-v0
55 | + HumanoidGravityOneAndQuarter-v0
56 | + HumanoidGravityOneAndHalf-v0
57 |
58 | ### Humanoid Extended Tasks
59 |
60 | We provide a humanoid multitask environment which combines the rewards for standing up and running in a single environment. The reward scale for this task is rather large, but aligns with the HumanoidStandup-v1 environment from OpenAI Gym. Additionally we provide a version of each environment with a sensor readout. When no wall is used, all sensors read zero. When a wall is used, each returns a distance to the wall as previously described.
61 |
62 | **HumanoidStandupAndRun-v0**
63 |
64 | This task rewards standing up and running. Combines the reward functions from HumanoidStandup and Humanoid.
65 |
66 | ### Random Walls and Sensors
67 |
68 | Inspired by the wall jumping experiment in (Finn et al., 2016), we build a set of similar environments by extending the OpenAI running tasks to use a multi-beam noiseless range sensor.
69 |
70 | Ray-beams are emitted from the torso of the runner to act as sensor readings. We also provide the usual running tasks with no walls and the sensor perception enabled so that an agent can first be trained with the larger observation vector before being run or trained on the environments with a wall set in the path of the agent.
71 |
72 | **Walker2dWithSensor-v0**
73 |
74 | This task is identical to the Walker2d environment in OpenAI gym, but with empty sensor readouts (10 extra observations by default) which would detect a wall if it were there. This is to keep the agent's state space the same dimensionality for transfer learning to a Wall environment.
75 |
76 | **Walker2dWall-v0**
77 |
78 | In this task, the agent is equipped with a noiseless ray tracing 10 beam sensor in an arc of 90 degrees and a maximum sensing distance of 10 meters, with readouts normalized to a range of [0, 1] to make it a percentage of the overall maximum ray detection distance. A wall is randomly placed in the path of the agent, at a location drawn from a uniform distribution from 1.8 to 3.8 meters ahead of the agent’s start location.
79 |
80 | **Similarly:**
81 |
82 | + HopperWithSensor-v0
83 | + HopperWall-v0
84 | + HalfCheetahWithSensor-v0
85 | + HalfCheetahWall-v0
86 | + Walker2dWithSensor-v0
87 | + Walker2dWall-v0
88 | + HumanoidWithSensor-v0
89 | + HumanoidWall-v0
90 | + HumanoidStandupWithSensor-v0
91 | + HumanoidStandupAndRunWithSensor-v0
92 | + HumanoidStandupAndRunWall-v0
93 |
94 | ### Arm Environments
95 |
96 | In the OpenAI Striker and Pusher tasks, a 7 DoF arm tries to hit a ball into a hole or push a peg to a goal position respectively. We extend these tasks to randomly move the goal position for the Pusher task, and randomly move the ball start position for the Striker task. As in the original tasks, we bound the varied goal or start state within some restricted uniform distribution as domain appropriate.
97 |
98 | **StrikerMovingStart-v0**
99 |
100 | The Striker-v0 env from OpenAI gym, but with a moving starting position as well as goal.
101 |
102 | **PusherMovingGoal-v0**
103 |
104 | The Pusher-v0 env from OpenAI gym, but with a moving goal position as well as start position.
105 |
106 | ### Morphological Modifications
107 |
108 | For the running agents, we provide environments which vary the morphology of a specific body part of the agent. We define “Big” bodyparts as scaling the mass and width of the limb by 1.25 and “Small” bodyparts as being scaled by 0.75. We also group categories of limbs for environments with multiple appendages (i.e. humanoid torso includes the abdomen; humanoid thigh also includes the hips; all appendages encompass both the left/right or front/back simultaneously such that a modified thigh includes both thighs).
109 |
110 | **Envs**:
111 |
112 | + HopperSmallFoot-v0
113 | + HopperSmallLeg-v0
114 | + HopperSmallThigh-v0
115 | + HopperSmallTorso-v0
116 | + HopperBigFoot-v0
117 | + HopperBigLeg-v0
118 | + HopperBigThigh-v0
119 | + HopperBigTorso-v0
120 | + Walker2dSmallFoot-v0
121 | + Walker2dSmallLeg-v0
122 | + Walker2dSmallThigh-v0
123 | + Walker2dSmallTorso-v0
124 | + Walker2dBigFoot-v0
125 | + Walker2dBigLeg-v0
126 | + Walker2dBigThigh-v0
127 | + Walker2dBigTorso-v0
128 | + HalfCheetahSmallFoot-v0
129 | + HalfCheetahSmallLeg-v0
130 | + HalfCheetahSmallThigh-v0
131 | + HalfCheetahSmallTorso-v0
132 | + HalfCheetahBigFoot-v0
133 | + HalfCheetahBigLeg-v0
134 | + HalfCheetahBigThigh-v0
135 | + HalfCheetahBigTorso-v0
136 | + HumanoidSmallFoot-v0
137 | + HumanoidSmallLeg-v0
138 | + HumanoidSmallThigh-v0
139 | + HumanoidSmallTorso-v0
140 | + HumanoidBigFoot-v0
141 | + HumanoidBigLeg-v0
142 | + HumanoidBigThigh-v0
143 | + HumanoidBigTorso-v0
144 | + HumanoidSmallHead-v0
145 | + HumanoidBigHead-v0
146 | + HumanoidSmallArm-v0
147 | + HumanoidBigArm-v0
148 | + HumanoidSmallHand-v0
149 | + HumanoidBigHand-v0
150 |
--------------------------------------------------------------------------------
/docs/wrappers.md:
--------------------------------------------------------------------------------
1 | ## Wrappers
2 |
3 | We provide a set of wrappers based on rllab to wrap OpenAI Gym environments, these typically will filter observations, etc.
4 |
5 | These wrappers require rllab as a dependency, but we're working on removing this dependency. Please submit a Pull Request if you want it done sooner!
6 |
7 | ### Current Wrappers
8 |
9 | We only provide one wrapper for now:
10 |
11 | **ObservationTransformWrapper** - Provides a list of transformers and runs all observations through the transformers.
12 |
13 | ### Current Transformers
14 |
15 | **SimpleNormalizePixelIntensitiesTransformer** - Normalizes pixels (divide by 255)
16 |
17 | **ResizeImageTransformer** - Resizes an image by a given percentage
18 |
19 | **RandomSensorMaskTransformer** - Randomly occludes some given percentage of the observations at every timestep
20 |
21 | ### Example Usage
22 |
23 | ```python
24 | gymenv = gym.make("Hopper-v1")
25 | transformers = [SimpleNormalizePixelIntensitiesTransformer(), ResizeImageTransformer(fraction_of_current_size=.35)]
26 | transformed_env = ObservationTransformWrapper(gymenv, transformers)
27 | ```
28 |
--------------------------------------------------------------------------------
/examples/image_based_navigation_example.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 | import time
4 | from gym_extensions.continuous import gym_navigation_2d
5 | import matplotlib.pyplot as plt
6 |
7 | env = gym.make('Image-Based-Navigation-2d-Map0-Goal0-v0')
8 | observation = env.reset()
9 | img = plt.imshow(observation, interpolation="None", origin='lower')
10 |
11 | for t in range(500):
12 | env.render()
13 | action = env.action_space.sample()
14 | observation, reward, done, info = env.step(np.array([1.,1.]))
15 |
16 | if not t%5:
17 | img.set_data(observation)
18 | plt.pause(1e-15)
19 | if done:
20 | print("Episode finished after {} timesteps".format(t+1))
21 | break
22 |
--------------------------------------------------------------------------------
/examples/manual_control_example.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import gym.spaces
3 | import gym.utils
4 | import gym.utils.seeding
5 | import numpy as np
6 | from gym_extensions.continuous import mujoco
7 | import sys
8 | try:
9 | from gym.envs.mujoco.mujoco_env import MujocoEnv
10 | except:
11 | print("This example depends on mujoco, see README.md for information on installing.")
12 | sys.exit()
13 | try:
14 | import roboschool
15 | from roboschool.gym_mujoco_xml_env import RoboschoolMujocoXmlEnv
16 | except:
17 | print("""This example depends on roboschool. To install roboschool, see instructions at:
18 | https://github.com/openai/roboschool""")
19 | sys.exit()
20 |
21 | # From https://raw.githubusercontent.com/openai/roboschool/master/roboschool/test_manual.py
22 | # Run this file to test environments using manual control:
23 | # python test_manual.py RoboschoolHopper-v0
24 |
25 | class TestKeyboardControl:
26 | def __init__(self):
27 | self.keys = {}
28 | self.control = np.zeros(9)
29 | self.human_pause = False
30 | self.human_done = False
31 | def key(self, event_type, key, modifiers):
32 | self.keys[key] = +1 if event_type==6 else 0
33 | #print ("event_type", event_type, "key", key, "modifiers", modifiers)
34 | self.control[0] = self.keys.get(0x1000014, 0) - self.keys.get(0x1000012, 0)
35 | self.control[1] = self.keys.get(0x1000013, 0) - self.keys.get(0x1000015, 0)
36 | self.control[2] = self.keys.get(ord('A'), 0) - self.keys.get(ord('Z'), 0)
37 | self.control[3] = self.keys.get(ord('S'), 0) - self.keys.get(ord('X'), 0)
38 | self.control[4] = self.keys.get(ord('D'), 0) - self.keys.get(ord('C'), 0)
39 | self.control[5] = self.keys.get(ord('F'), 0) - self.keys.get(ord('V'), 0)
40 | self.control[6] = self.keys.get(ord('G'), 0) - self.keys.get(ord('B'), 0)
41 | self.control[7] = self.keys.get(ord('H'), 0) - self.keys.get(ord('N'), 0)
42 | self.control[8] = self.keys.get(ord('J'), 0) - self.keys.get(ord('M'), 0)
43 | if event_type==6 and key==32: # press Space to pause
44 | self.human_pause = 1 - self.human_pause
45 | if event_type==6 and key==0x1000004: # press Enter to restart
46 | self.human_done = True
47 |
48 |
49 | class TestKeyboardControlMuj:
50 | def __init__(self):
51 | self.keys = {}
52 | self.control = np.zeros(9)
53 | self.human_pause = False
54 | self.human_done = False
55 |
56 | def key(self, window, key, scancode, event_type, modifiers):
57 | self.keys[key] = +1 if event_type==1 else 0
58 | # print(key)
59 | #print ("event_type", event_type, "key", key, "modifiers", modifiers)
60 | self.control[0] = self.keys.get(265, 0) - self.keys.get(264, 0)
61 | self.control[1] = self.keys.get(262, 0) - self.keys.get(263, 0)
62 | self.control[2] = self.keys.get(ord('A'), 0) - self.keys.get(ord('Z'), 0)
63 | self.control[3] = self.keys.get(ord('S'), 0) - self.keys.get(ord('X'), 0)
64 | self.control[4] = self.keys.get(ord('D'), 0) - self.keys.get(ord('C'), 0)
65 | self.control[5] = self.keys.get(ord('F'), 0) - self.keys.get(ord('V'), 0)
66 | self.control[6] = self.keys.get(ord('G'), 0) - self.keys.get(ord('B'), 0)
67 | self.control[7] = self.keys.get(ord('H'), 0) - self.keys.get(ord('N'), 0)
68 | self.control[8] = self.keys.get(ord('J'), 0) - self.keys.get(ord('M'), 0)
69 | if event_type==1 and key==32: # press Space to pause
70 | self.human_pause = 1 - self.human_pause
71 | if event_type==1 and key==257: # press Enter to restart
72 | self.human_done = True
73 |
74 |
75 | usage = """
76 | This is manual test. Usage:
77 | %s
78 |
79 | Keyboard shortcuts:
80 | * F1 toggle slow motion
81 | * F2 toggle captions
82 | * F3 toggle HUD: observations, actions, reward
83 | * ENTER to restart episode (works only in this test)
84 | * SPACE to pause (works only in this test)
85 | * Up/down, left/right, a/z, s/x, d/c, f/v, g/b, h/n, j/m to control robot (works only in this test)
86 | """
87 |
88 | def test(env_id):
89 | print(usage % sys.argv[0])
90 | env = gym.make(env_id)
91 | # import pdb; pdb.set_trace()
92 | env.reset() # This creates default single player scene
93 | if isinstance(env.unwrapped, MujocoEnv):
94 | ctrl = TestKeyboardControlMuj()
95 | from mujoco_py.glfw import set_key_callback
96 | set_key_callback(env.unwrapped._get_viewer().window, ctrl.key)
97 | elif isinstance(env.unwrapped, RoboschoolMujocoXmlEnv):
98 | ctrl = TestKeyboardControl()
99 | env.unwrapped.scene.cpp_world.set_key_callback(ctrl.key)
100 | if "camera" in env.unwrapped.__dict__:
101 | env.unwrapped.camera.set_key_callback(ctrl.key)
102 | else:
103 | raise NotImplementedError
104 |
105 | a = np.zeros(env.action_space.shape)
106 | copy_n = min(len(a), len(ctrl.control))
107 | ctrl.human_pause = False
108 |
109 | while 1:
110 | ctrl.human_done = False
111 | sn = env.reset()
112 | frame = 0
113 | reward = 0.0
114 | episode_over = False
115 | while 1:
116 | s = sn
117 | a[:copy_n] = ctrl.control[:copy_n]
118 | # import pdb; pdb.set_trace()
119 | sn, rplus, done, info = env.step(a)
120 | reward += rplus
121 | #env.render("rgb_array")
122 | episode_over |= done
123 | still_visible = True
124 | # import pdb; pdb.set_trace()
125 | while True:
126 | env.render("human")
127 | #env.unwrapped.camera.test_window()
128 | if not ctrl.human_pause: break
129 | if ctrl.human_done: break
130 | if not still_visible: break
131 | frame += 1
132 | if not still_visible: break
133 |
134 | if __name__=="__main__":
135 | env_id = "RoboschoolHumanoid-v0" if len(sys.argv) <= 1 else sys.argv[1]
136 | test(env_id)
137 |
--------------------------------------------------------------------------------
/examples/range_based_navigation_example.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 | from gym_extensions.continuous.gym_navigation_2d.env_generator import EnvironmentGenerator, Environment, EnvironmentCollection, Obstacle
4 | import numpy as np
5 | import time
6 |
7 | env = gym.make('Limited-Range-Based-Navigation-2d-Map1-Goal1-v0')
8 |
9 | observation = env.reset()
10 | dt1 = []
11 | dt2 = []
12 |
13 | for t in range(100):
14 | env.render()
15 |
16 | start = time.time()
17 | action = env.action_space.sample()
18 | end = time.time()
19 |
20 | dt1.append(end-start)
21 |
22 | start = time.time()
23 | observation, reward, done, info = env.step(action)
24 | end = time.time()
25 |
26 | dt2.append(end-start)
27 |
28 | if done:
29 | print("Episode finished after {} timesteps".format(t+1))
30 | break
31 |
32 | print( sum(dt1)/len(dt1), sum(dt2)/len(dt2))
33 |
--------------------------------------------------------------------------------
/examples/state_based_navigation_example.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 | from gym_extensions.continuous.gym_navigation_2d.env_generator import Environment, EnvironmentCollection, Obstacle
4 | import numpy as np
5 | import time
6 | import timeit
7 |
8 | env = gym.make('State-Based-Navigation-2d-Map0-Goal2-v0')
9 |
10 | observation = env.reset()
11 | dt1 = []
12 | dt2 = []
13 | for t in range(300):
14 | env.render()
15 |
16 | start = time.time()
17 | action = env.action_space.sample()
18 | end = time.time()
19 |
20 | dt1.append(end - start)
21 |
22 | start = time.time()
23 | observation, reward, done, info = env.step(action)
24 | end = time.time()
25 |
26 | dt2.append(end - start)
27 |
28 |
29 | if done:
30 | print("Episode finished after {} timesteps".format(t+1))
31 | break
32 |
33 | print( sum(dt1)/len(dt1), sum(dt2)/len(dt2))
34 |
--------------------------------------------------------------------------------
/gym_extensions/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | if sys.version_info < (2,7):
3 | sys.exit('Sorry, Python < 2.7 is not supported, install Python 3.5.2')
4 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/gym_extensions/continuous/__init__.py
--------------------------------------------------------------------------------
/gym_extensions/continuous/box2d/__init__.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import os
3 |
4 | custom_envs = {
5 | # Contextual environments
6 | "FalconLander-v0":
7 | dict(path='gym_extensions.continuous.box2d.falcon_drone_ship_lander:FalconLander',
8 | max_episode_steps=1000,
9 | kwargs= dict())
10 | }
11 |
12 | def register_custom_envs():
13 | for key, value in custom_envs.items():
14 | arg_dict = dict(id=key,
15 | entry_point=value["path"],
16 | max_episode_steps=value["max_episode_steps"],
17 | kwargs=value["kwargs"])
18 |
19 | if "reward_threshold" in value:
20 | arg_dict["reward_threshold"] = value["reward_threshold"]
21 |
22 | gym.envs.register(**arg_dict)
23 |
24 | register_custom_envs()
25 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/box2d/falcon_drone_ship_lander.py:
--------------------------------------------------------------------------------
1 | import sys, math
2 | import numpy as np
3 | import pyglet
4 |
5 | import Box2D
6 | from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
7 | import pygame
8 | from pygame.locals import K_RIGHT, K_LEFT, KEYDOWN, KEYUP
9 |
10 | import gym
11 | from gym import spaces
12 | from gym.utils import seeding
13 |
14 |
15 | # Inspired by the existing OpenAI Gym LunarLander-v0 environment, as well as by Elon Musk's SpaceX crazy projects !
16 | __author__ = "Victor Barbaros"
17 | __credits__ = ["OpenAi Gym", "Oleg Klimov"]
18 | __version__ = "0.0.1"
19 | __maintainer__ = "Victor Barbaros"
20 | __github_username__ = "vBarbaros"
21 |
22 | FPS = 60
23 | SCALE = 30.0 #default:30 affects how fast-paced the game is, forces should be adjusted as well
24 |
25 | MAIN_ENGINE_POWER = 100.0 # 13.0
26 | SIDE_ENGINE_POWER = 3.0 # 0.6
27 |
28 | INITIAL_RANDOM = 500 # Set 1500 to make game harder
29 |
30 | FALCON_POLY =[
31 | (-4,+150), (-13,135),(-13,0), (-13,-30),
32 | (+13,-30),(+13,0),(+13,+135), (+4,+150)
33 | ]
34 |
35 | LEG_AWAY = 12 #20
36 | LEG_DOWN = 30 # 18
37 | LEG_W, LEG_H = 3, 10
38 | LEG_SPRING_TORQUE = 40
39 |
40 | SIDE_ENGINE_HEIGHT = 130.0 #14
41 | SIDE_ENGINE_AWAY = 15.0 #12
42 |
43 | DRONE_SHIP_H = 0.25
44 | DRONE_SHIP_W = 2.5
45 | SEA_LEVEL = 30/SCALE
46 | GOING_LEFT = False
47 | CONST_FORCE_DRONE_SHIP = 0.75
48 | FREQUENCY_FACTOR = 250
49 |
50 | VIEWPORT_W = 1450
51 | VIEWPORT_H = 820
52 |
53 |
54 | class ContactDetector(contactListener):
55 | def __init__(self, env):
56 | contactListener.__init__(self)
57 | self.env = env
58 | def BeginContact(self, contact):
59 | if self.env.falcon_rocket==contact.fixtureA.body or self.env.falcon_rocket==contact.fixtureB.body:
60 | self.env.game_over = True
61 | for i in range(2):
62 | if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
63 | self.env.legs[i].ground_contact = True
64 | def EndContact(self, contact):
65 | for i in range(2):
66 | if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
67 | self.env.legs[i].ground_contact = False
68 |
69 | class FalconLander(gym.Env):
70 | metadata = {
71 | 'render.modes': ['human', 'rgb_array'],
72 | 'video.frames_per_second' : FPS
73 | }
74 |
75 | continuous = False
76 |
77 | def __init__(self):
78 | pygame.init()
79 | self._seed()
80 | self.viewer = None
81 |
82 | self.world = Box2D.b2World()
83 | self.sea_surface = None
84 | self.falcon_rocket = None
85 | self.floating_drone_ship = None
86 | self.particles = []
87 |
88 | self.prev_reward = None
89 |
90 | high = np.array([np.inf]*8) # useful range is -1 .. +1, but spikes can be higher
91 | self.observation_space = spaces.Box(-high, high)
92 |
93 | if self.continuous:
94 | # Action is two floats [main engine, left-right engines].
95 | # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
96 | # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
97 | self.action_space = spaces.Box(-1, +1, (2,))
98 | else:
99 | # Nop, fire left engine, main engine, right engine
100 | self.action_space = spaces.Discrete(4)
101 |
102 | self._reset()
103 |
104 |
105 | def _seed(self, seed=None):
106 | self.np_random, seed = seeding.np_random(seed)
107 | return [seed]
108 |
109 |
110 | def _destroy(self):
111 | if not self.sea_surface: return
112 | self.world.contactListener = None
113 | self._clean_particles(True)
114 | self.world.DestroyBody(self.sea_surface)
115 | self.sea_surface = None
116 |
117 | self.world.DestroyBody(self.floating_drone_ship)
118 | self.floating_drone_ship = None
119 |
120 | self.world.DestroyBody(self.falcon_rocket)
121 | self.falcon_rocket = None
122 | self.world.DestroyBody(self.legs[0])
123 | self.world.DestroyBody(self.legs[1])
124 |
125 |
126 | def _reset(self):
127 | self._destroy()
128 | self.world.contactListener_keepref = ContactDetector(self)
129 | self.world.contactListener = self.world.contactListener_keepref
130 | self.game_over = False
131 | self.prev_shaping = None
132 |
133 | W = VIEWPORT_W/SCALE
134 | H = VIEWPORT_H/SCALE
135 |
136 | # create the sea surface into which the flacon might draw (with no collision)
137 | f = fixtureDef(
138 | shape=polygonShape(box=(W,SEA_LEVEL), radius=0.0),
139 | density=0.5,
140 | friction=0.03)
141 |
142 | self.sea_surface = self.world.CreateStaticBody(
143 | position=(0, SEA_LEVEL), angle=0,
144 | fixtures = f)
145 |
146 | # Create the floating drone ship
147 | f = fixtureDef(
148 | shape=polygonShape(box=(DRONE_SHIP_W,DRONE_SHIP_H),
149 | radius=0.0), density=0.4, friction=0.05,
150 | categoryBits=0x0020,
151 | maskBits=0x001) #, userData=self.logo_img)
152 |
153 | self.floating_drone_ship = self.world.CreateDynamicBody(
154 | position=(DRONE_SHIP_W/SCALE, SEA_LEVEL), angle=0, linearDamping = 0.7, angularDamping = 0.3,
155 | fixtures = f)
156 |
157 | self.floating_drone_ship.color1 = (0.1,0.1,0.1)
158 | self.floating_drone_ship.color2 = (0,0,0)
159 |
160 | self.sea_surface.color1 = (0.5,0.4,0.9)
161 | self.sea_surface.color2 = (0.3,0.3,0.5)
162 |
163 |
164 | initial_y = VIEWPORT_H/SCALE
165 | # create the falcon lander
166 | self.falcon_rocket = self.world.CreateDynamicBody(
167 | position = (VIEWPORT_W/SCALE/2, initial_y),
168 | angle=0.0,
169 | fixtures = fixtureDef(
170 | shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in FALCON_POLY ]),
171 | density=5.0, # 5.0
172 | friction=0.1,
173 | categoryBits=0x001,
174 | maskBits=0x0020, # collide only with floating_drone_ship
175 | restitution=0.0) #, userData=self.logo_img) # 0.99 bouncy
176 | )
177 | self.falcon_rocket.color1 = (1.0,1.0,1.0)
178 | self.falcon_rocket.color2 = (0,0,0)
179 |
180 | self.falcon_rocket.ApplyForceToCenter( (
181 | self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
182 | self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
183 | ), True)
184 |
185 | # create the legs of the falcon rocket
186 | self.legs = []
187 | for i in [-1,+1]:
188 | leg = self.world.CreateDynamicBody(
189 | position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
190 | angle = (i*0.05),
191 | fixtures = fixtureDef(
192 | shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
193 | density=1.0,
194 | restitution=0.0,
195 | categoryBits=0x001,
196 | maskBits=0x0020)
197 | )
198 | leg.ground_contact = False
199 | leg.color1 = (0,0,0)
200 | leg.color2 = (0,0,0)
201 | rjd = revoluteJointDef(
202 | bodyA=self.falcon_rocket,
203 | bodyB=leg,
204 | localAnchorA=(0, 0),
205 | localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
206 | enableMotor=True,
207 | enableLimit=True,
208 | maxMotorTorque=LEG_SPRING_TORQUE,
209 | motorSpeed=+0.3*i # low enough not to jump back into the sky
210 | )
211 | if i==-1:
212 | rjd.lowerAngle = +0.9 - 0.5 # Yes, the most esoteric numbers here, angles legs have freedom to travel within
213 | rjd.upperAngle = +0.9
214 | else:
215 | rjd.lowerAngle = -0.9
216 | rjd.upperAngle = -0.9 + 0.5
217 | leg.joint = self.world.CreateJoint(rjd)
218 | self.legs.append(leg)
219 |
220 | self.drawlist = [self.falcon_rocket] + self.legs + [self.floating_drone_ship] + [self.sea_surface]
221 |
222 | return self._step(np.array([0,0]) if self.continuous else 0)[0]
223 |
224 |
225 | def _create_particle(self, mass, x, y, ttl):
226 | # create those butifull particles that bubble off when the force is applied
227 | p = self.world.CreateDynamicBody(
228 | position = (x,y),
229 | angle=0.0,
230 | fixtures = fixtureDef(
231 | shape=circleShape(radius=2/SCALE, pos=(0,0)),
232 | density=mass,
233 | friction=0.1,
234 | categoryBits=0x0100,
235 | maskBits=0x001, # collide only with ground
236 | restitution=0.3)
237 | )
238 | p.ttl = ttl
239 | self.particles.append(p)
240 | self._clean_particles(False)
241 | return p
242 |
243 | def _clean_particles(self, all):
244 | while self.particles and (all or self.particles[0].ttl<0):
245 | self.world.DestroyBody(self.particles.pop(0))
246 |
247 |
248 | def control_floating_platform(self):
249 | '''It's controlled autonomously so the learner has no direct access
250 | '''
251 |
252 | global GOING_LEFT
253 | fx = 1
254 | fy = 0
255 | p1 = self.floating_drone_ship.GetWorldPoint(localPoint=(3.25, 5.5))
256 | p2 = self.floating_drone_ship.GetWorldPoint(localPoint=(3.75, 7.5))
257 | width, fy = p1[0], p1[1]
258 |
259 | new_y_p1 = 2.5*math.cos(FREQUENCY_FACTOR)
260 | new_y_p2 = 2.5*math.sin(FREQUENCY_FACTOR)
261 |
262 | if not GOING_LEFT:
263 | self.floating_drone_ship.ApplyForce(force=(CONST_FORCE_DRONE_SHIP*fx,CONST_FORCE_DRONE_SHIP*new_y_p2), point=p2, wake=True)
264 | self.floating_drone_ship.ApplyForce(force=(CONST_FORCE_DRONE_SHIP/2*fx,CONST_FORCE_DRONE_SHIP/2*new_y_p1), point=p1, wake=True)
265 | if width > (VIEWPORT_W/SCALE*0.7):
266 | GOING_LEFT = True
267 | else:
268 | self.floating_drone_ship.ApplyForce(force=((-1)*CONST_FORCE_DRONE_SHIP*fx,-CONST_FORCE_DRONE_SHIP*new_y_p1), point=p1, wake=True)
269 | self.floating_drone_ship.ApplyForce(force=((-1)*CONST_FORCE_DRONE_SHIP/2*fx,-CONST_FORCE_DRONE_SHIP/2*new_y_p2), point=p2, wake=True)
270 | if width <= (VIEWPORT_W/SCALE*0.3):
271 | GOING_LEFT = False
272 |
273 |
274 | def _step(self, action):
275 | # act on the floating platform, it's autonomous
276 | self.control_floating_platform()
277 |
278 | # Engines
279 | tip = (math.sin(self.falcon_rocket.angle), math.cos(self.falcon_rocket.angle))
280 | side = (-tip[1], tip[0]);
281 | dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
282 |
283 | m_power = 0.0
284 | if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
285 | # Main engine
286 | if self.continuous:
287 | m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
288 | assert m_power>=0.5 and m_power <= 1.0
289 | else:
290 | m_power = 1.0
291 | orient_x = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
292 | orient_y = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
293 | impulse_pos = (self.falcon_rocket.position[0] + orient_x, self.falcon_rocket.position[1] + orient_y)
294 | p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
295 | p.ApplyLinearImpulse( ( orient_x*MAIN_ENGINE_POWER*m_power, orient_y*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
296 | self.falcon_rocket.ApplyLinearImpulse( (-orient_x*MAIN_ENGINE_POWER*m_power, -orient_y*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
297 |
298 | s_power = 0.0
299 | if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
300 | # Orientation engines
301 | if self.continuous:
302 | direction = np.sign(action[1])
303 | s_power = np.clip(np.abs(action[1]), 0.5,1.0)
304 | assert s_power>=0.5 and s_power <= 1.0
305 | else:
306 | direction = action-2
307 | s_power = 1.0
308 | orient_x = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
309 | orient_y = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
310 | impulse_pos = (self.falcon_rocket.position[0] + orient_x - tip[0]*17/SCALE, self.falcon_rocket.position[1] + orient_y + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
311 | p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
312 | p.ApplyLinearImpulse( ( orient_x*SIDE_ENGINE_POWER*s_power, orient_y*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
313 | self.falcon_rocket.ApplyLinearImpulse( (-orient_x*SIDE_ENGINE_POWER*s_power, -orient_y*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
314 |
315 | self.world.Step(1.0/FPS, 6*30, 2*30)
316 |
317 | pos = self.falcon_rocket.position
318 | vel = self.falcon_rocket.linearVelocity
319 |
320 | pos_floating_drone_ship = self.floating_drone_ship.position
321 | vel_floating_drone_ship = self.floating_drone_ship.linearVelocity
322 |
323 | state = [
324 | (pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
325 | (pos.y - (DRONE_SHIP_H + LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
326 | vel.x*(VIEWPORT_W/SCALE/2)/FPS,
327 | vel.y*(VIEWPORT_H/SCALE/2)/FPS,
328 | self.falcon_rocket.angle,
329 | 20.0*self.falcon_rocket.angularVelocity,
330 | 1.0 if (self.legs[0].ground_contact and self.legs[1].ground_contact) else 0.0,
331 | 1.0 if (self.legs[0].ground_contact and self.legs[1].ground_contact) else 0.0,
332 | (pos_floating_drone_ship.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
333 | pos_floating_drone_ship.y / (VIEWPORT_H/SCALE/2),
334 | vel_floating_drone_ship.x*(VIEWPORT_W/SCALE/2)/FPS,
335 | vel_floating_drone_ship.y*(VIEWPORT_H/SCALE/2)/FPS,
336 | self.floating_drone_ship.angle
337 | ]
338 |
339 | assert len(state)==13
340 |
341 | reward = 0
342 |
343 | shaping = \
344 | - 100*np.sqrt( (state[0] - state[8])**2 + (state[1] - state[9])**2) \
345 | - 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
346 | - 100*abs(state[4]) + 10*state[6] + 10*state[7] \
347 | - 10*abs(state[8]) - 10*abs(state[10]) - 10*abs(state[11])
348 | # in the last line above: lower values of x, dx and angle of drone ship => higher reward
349 |
350 | if self.prev_shaping is not None:
351 | reward = shaping - self.prev_shaping
352 | self.prev_shaping = shaping
353 |
354 | reward -= m_power*0.15 # In the floating ship version, the penalty should be smaller
355 | reward -= s_power*0.05
356 |
357 | done = False
358 |
359 | DRONE_LEVEL = state[9]
360 | if self.game_over or state[1] < DRONE_LEVEL:
361 | done = True
362 | reward = -150
363 |
364 | if not self.falcon_rocket.awake and (state[6] == 1 and state[7] == 1):
365 | done = True
366 | reward = +150
367 |
368 | elif not self.falcon_rocket.awake and (state[6] != 1 and state[7] != 1):#if not landed on both legs, penalize
369 | done = True
370 | reward = -250
371 |
372 | return np.array(state), reward, done, {}
373 |
374 |
375 | def _render(self, mode='human', close=False):
376 | if close:
377 | if self.viewer is not None:
378 | self.viewer.close()
379 | self.viewer = None
380 | return
381 |
382 | from gym.envs.classic_control import rendering
383 | if self.viewer is None:
384 | self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
385 | self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
386 |
387 | for obj in self.particles:
388 | obj.ttl -= 0.15
389 | obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
390 | obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
391 |
392 | self._clean_particles(False)
393 |
394 | for obj in self.particles + self.drawlist:
395 | for f in obj.fixtures:
396 | trans = f.body.transform
397 | if type(f.shape) is circleShape:
398 | t = rendering.Transform(translation=trans*f.shape.pos)
399 | self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
400 | self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
401 | else:
402 | path = [trans*v for v in f.shape.vertices]
403 | self.viewer.draw_polygon(path, color=obj.color1)
404 | path.append(path[0])
405 | self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
406 |
407 | return self.viewer.render(return_rgb_array = mode=='rgb_array')
408 |
409 |
410 | class FalconLanderContinuous(FalconLander):
411 | continuous = True
412 |
413 | def heuristic(env, s):
414 | # Heuristic for:
415 | # 1. Testing.
416 | # 2. Demonstration rollout.
417 |
418 | #PID to get closer to the floating drone ship
419 | cross_err = math.sqrt( (s[0] - (s[8]))**2 + (s[1] - s[9])**2 )
420 | #print 'cross_error: ', cross_err
421 | angle_targ = (s[0]-s[8])*0.5 + s[2]*0.85 # angle should point towards center (s[0] is horizontal coordinate, s[2] horiz. speed)
422 | if angle_targ > 0.3:
423 | angle_targ = 0.3 # more than 0.4 radians (22 degrees) is bad
424 | if angle_targ < -0.3:
425 | angle_targ = -0.3
426 |
427 | # PID controller: s[4] angle, s[5] angularSpeed
428 | angle_todo = (angle_targ - s[4])*0.55 - (s[5])*0.25
429 | #print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
430 |
431 | hover_targ = 0.25*(cross_err)
432 | # PID controller: s[1] vertical coordinate s[3] vertical speed
433 | hover_todo = (hover_targ/s[1] - s[1])*0.6 - (s[3])*0.4
434 | #print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
435 |
436 | if s[6] or s[7]: # both legs have contact
437 | angle_todo = 0
438 | hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
439 |
440 | if env.continuous:
441 | a = np.array( [hover_todo*30 - 1, -angle_todo*30] )
442 | a = np.clip(a, -1, +1)
443 |
444 | return a
445 |
446 |
447 |
448 | def key_control(env, s):
449 | action_done = False
450 | action = [0, 0]
451 | #print type(env.logo_img)
452 | #print env.logo_img
453 | # Action is two floats [main engine, left-right engines].
454 | # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
455 | # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
456 | for event in pygame.event.get():
457 | if event.type == pygame.QUIT:
458 | action_done = True
459 | break
460 | if event.key == K_RIGHT:
461 | action[0] = 0.0
462 | action[1] = 1.0
463 | action_done = True
464 |
465 | elif event.key == K_LEFT:
466 | action[0] = 0.0
467 | action[1] = -1.0
468 | action_done = True
469 |
470 | elif event.type == KEYUP:
471 | action[0] = 1
472 | action[1] = 0
473 | action_done = True
474 |
475 | return np.array(action)
476 |
477 |
478 | if __name__=="__main__":
479 | #env = FalconLander()
480 | env = FalconLanderContinuous()
481 | s = env.reset()
482 | total_reward = 0
483 | steps = 0
484 | while True:
485 | a = heuristic(env, s)
486 | #a = key_control(env, s)
487 | s, r, done, info = env.step(a)
488 | env.render()
489 | total_reward += r
490 | if steps % 20 == 0 or done:
491 | print(["{:+0.2f}".format(x) for x in s])
492 | print("step {} total_reward {:+0.2f}".format(steps, total_reward))
493 | steps += 1
494 | if done: break
495 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/__init__.py:
--------------------------------------------------------------------------------
1 | from .range_based_navigation import LimitedRangeBasedPOMDPNavigation2DEnv, StateBasedMDPNavigation2DEnv
2 | from .image_based_navigation import ImageBasedNavigation2DEnv
3 | #
4 | from gym.envs import register
5 | import numpy as np
6 |
7 | # (x,y)
8 | idx_to_goal = [ (np.array([230.0, 430.0]), np.array([230.0, 130.0]), np.array([500.0, 430.0]) )] * 10
9 | idx_to_goal[1] = (np.array([130.0, 370.0]), np.array([130.0, 110.0]), np.array([520.0, 250.0]) )
10 | idx_to_goal[2] = (np.array([530.0, 110.0]), np.array([130.0, 310.0]), np.array([460.0, 330.0]) )
11 | idx_to_goal[3] = (np.array([400.0, 50.0]), np.array([180.0, 320.0]), np.array([430.0, 310.0]) )
12 | idx_to_goal[4] = (np.array([180.0, 380.0]), np.array([610.0, 120.0]), np.array([420.0, 330.0]) )
13 | idx_to_goal[5] = (np.array([500.0, 90.0]), np.array([180.0, 390.0]), np.array([380.0, 320.0]) )
14 | idx_to_goal[6] = (np.array([480.0, 150.0]), np.array([440.0, 380.0]), np.array([310.0, 220.0]) )
15 | idx_to_goal[7] = (np.array([500.0, 380.0]), np.array([470.0, 280.0]), np.array([270.0, 280.0]) )
16 | idx_to_goal[8] = (np.array([250.0, 440.0]), np.array([420.0, 200.0]), np.array([150.0, 180.0]) )
17 | idx_to_goal[9] = (np.array([390.0, 110.0]), np.array([520.0, 350.0]), np.array([240.0, 310.0]) )
18 |
19 | custom_envs = {}
20 | for i in range(10):
21 | for j in range(3):
22 | # add each env to dictionary
23 | custom_envs['State-Based-Navigation-2d-Map%d-Goal%d-v0' % (i, j)] = dict(
24 | path='gym_extensions.continuous.gym_navigation_2d:StateBasedMDPNavigation2DEnv',
25 | max_episode_steps=1000,
26 | kwargs=dict(world_idx=i, destination=idx_to_goal[i][j]))
27 |
28 | custom_envs['State-Based-Navigation-2d-Map%d-Goal%d-KnownGoalPosition-v0' % (i, j)] = dict(
29 | path='gym_extensions.continuous.gym_navigation_2d:StateBasedMDPNavigation2DEnv',
30 | max_episode_steps=1000,
31 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j]))
32 |
33 | custom_envs['State-Based-Navigation-2d-Map%d-Goal%d-KnownGoalPosition-v0' % (i, j)] = dict(
34 | path='gym_extensions.continuous.gym_navigation_2d:StateBasedMDPNavigation2DEnv',
35 | max_episode_steps=1000,
36 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j],
37 | add_self_position_to_observation=True,
38 | add_goal_position_to_observation=True))
39 |
40 | custom_envs['Limited-Range-Based-Navigation-2d-Map%d-Goal%d-v0' % (i, j)] = dict(
41 | path='gym_extensions.continuous.gym_navigation_2d:LimitedRangeBasedPOMDPNavigation2DEnv',
42 | max_episode_steps=1000,
43 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j]))
44 |
45 | custom_envs['Limited-Range-Based-Navigation-2d-Map%d-Goal%d-KnownPositions-v0' % (i, j)] = dict(
46 | path='gym_extensions.continuous.gym_navigation_2d:LimitedRangeBasedPOMDPNavigation2DEnv',
47 | max_episode_steps=1000,
48 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j],
49 | add_self_position_to_observation=True,
50 | add_goal_position_to_observation=True))
51 |
52 | custom_envs['Image-Based-Navigation-2d-Map%d-Goal%d-v0' % (i, j)] = dict(
53 | path='gym_extensions.continuous.gym_navigation_2d:ImageBasedNavigation2DEnv',
54 | max_episode_steps=1000,
55 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j]))
56 |
57 | # register each env into
58 | def register_custom_envs():
59 | for key, value in custom_envs.items():
60 | arg_dict = dict(id=key,
61 | entry_point=value['path'],
62 | max_episode_steps=value['max_episode_steps'],
63 | kwargs=value['kwargs'])
64 | if 'reward_threshold' in value.keys():
65 | arg_dict['reward_threshold'] = value['reward_threshold']
66 | register(**arg_dict)
67 |
68 | register_custom_envs()
69 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/env_generator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import scipy
4 | from scipy import stats
5 | import numpy as np
6 | from math import sqrt, asin, cos, sin, atan2
7 | import networkx as nx
8 | from .env_utils import Obstacle, Environment
9 | from .geometry_utils import *
10 | import sys
11 | import pickle
12 |
13 |
14 | class EnvironmentGenerator(object):
15 |
16 | def __init__(self, x_range, y_range, width_range, height_range):
17 | self.x_range = x_range
18 | self.y_range = y_range
19 | self.width_range = width_range
20 | self.height_range = height_range
21 |
22 | def sample_spatial_poisson_process(self, rate):
23 | xmin, xmax = self.x_range
24 | ymin, ymax = self.y_range
25 |
26 | dx = xmax - xmin
27 | dy = ymax - ymin
28 |
29 | N = stats.poisson( rate * dx * dy ).rvs()
30 | x = stats.uniform.rvs(xmin, dx, ((N, 1)) )
31 | y = stats.uniform.rvs(ymin, dy, ((N, 1)) )
32 |
33 | centers = np.hstack((x,y))
34 | return centers
35 |
36 | def sample_axis_aligned_rectangles(self, density):
37 | wmin, wmax = self.width_range
38 | hmin, hmax = self.height_range
39 |
40 | dw = wmax - wmin
41 | dh = hmax - hmin
42 |
43 | centers = self.sample_spatial_poisson_process(rate=density)
44 | widths = stats.uniform.rvs(wmin, dw, ((centers.shape[0], 1)) )
45 | heights = stats.uniform.rvs(hmin, dh, ((centers.shape[0], 1)) )
46 |
47 | # Only keep obstacels that are fully within allowed range
48 | # This is so that pixel occupancy queries are the same as
49 | # continuous distance queries, if they are ever used
50 | x_within_bounds = centers[:, 0] + widths[:, 0]/2.0 <= x_range[1]
51 | x_within_bounds = x_within_bounds * (centers[:, 0] - widths[:, 0]/2.0 >= x_range[0])
52 |
53 | y_within_bounds = centers[:, 1] + heights[:, 0]/2.0 <= y_range[1]
54 | y_within_bounds = y_within_bounds * (centers[:, 1] - heights[:, 0]/2.0 >= y_range[0])
55 |
56 | valid_idx = x_within_bounds * y_within_bounds
57 | return centers[valid_idx, :], widths[valid_idx, :], heights[valid_idx, :]
58 |
59 | def merge_rectangles_into_obstacles(self, centers, widths, heights, epsilon):
60 | """Merges rectangles defined by centers, widths, heights. Two rectangles
61 | with distance < epsilon are considered part of the same object."""
62 |
63 | G = nx.Graph()
64 | obstacles = {i: Obstacle(centers[i, :], widths[i, 0], heights[i, 0]) for i in range(len(centers))}
65 | G.add_nodes_from(obstacles.keys())
66 |
67 | for i in obstacles:
68 | for j in obstacles:
69 | if i != j and obstacles[i].distance_to_obstacle(obstacles[j]) < epsilon:
70 | G.add_edge(i,j)
71 |
72 | merged_obstacles = {}
73 | conn_components = nx.connected_components(G)
74 | for cc in conn_components:
75 | cc = list(cc)
76 | new_obs = obstacles[cc[0]]
77 | for i in range(1, len(cc)):
78 | new_obs.merge(obstacles[cc[i]])
79 |
80 | merged_obstacles[cc[0]] = new_obs
81 |
82 | return merged_obstacles
83 |
84 |
85 | class EnvironmentCollection(object):
86 |
87 | def __init__(self):
88 | self.x_range = []
89 | self.y_range = []
90 | self.width_range = []
91 | self.height_range = []
92 | self.num_environments = 0
93 | self.map_collection = {}
94 |
95 | def generate_random(self, x_range, y_range, width_range, height_range, density, num_environments):
96 | self.x_range = x_range
97 | self.y_range = y_range
98 | self.width_range = width_range
99 | self.height_range = height_range
100 | self.num_environments = num_environments
101 | self.map_collection = {}
102 |
103 | eg = EnvironmentGenerator(x_range, y_range, width_range, height_range)
104 | for i in range(self.num_environments):
105 | print('Sampling environment', i)
106 | centers, widths, heights = eg.sample_axis_aligned_rectangles(density)
107 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
108 | self.map_collection[i] = Environment(self.x_range, self.y_range, list(obstacles.values()))
109 |
110 | def read(self, pkl_filename):
111 | file_object = open(pkl_filename, 'rb')
112 | self.x_range, self.y_range, worlds_without_classes = pickle.load(file_object, encoding='bytes')
113 | self.map_collection = {idx: Environment(val[0], val[1], [Obstacle(c,w,h) for c,w,h in val[2]]) for idx, val in worlds_without_classes.items()}
114 | file_object.close()
115 |
116 | def save(self, pkl_filename):
117 | file_object = open(pkl_filename, 'wb')
118 | worlds_without_classes = { idx : (world.x_range,
119 | world.y_range,
120 | [(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights) for obs in world.obstacles])
121 |
122 | for idx, world in self.map_collection.items()}
123 |
124 | pickle.dump((self.x_range, self.y_range, worlds_without_classes), file_object)
125 | file_object.close()
126 |
127 |
128 | if __name__ == "__main__":
129 | import argparse
130 | parser = argparse.ArgumentParser()
131 | parser.add_argument("filepath_to_save", help="The pickle filepath (i.e. assets/world_640x480_v0.pkl)")
132 | parser.add_argument("--num_environments", default=10, type=int)
133 | args = parser.parse_args()
134 |
135 | x_range=[0, 640]
136 | y_range=[0, 480]
137 | width_range=[10, 30]
138 | height_range=[10,50]
139 |
140 | density = 0.0003
141 |
142 | ec = EnvironmentCollection()
143 | ec.generate_random(x_range, y_range, width_range, height_range, density, args.num_environments)
144 | ec.save(args.filepath_to_save)
145 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/env_utils.py:
--------------------------------------------------------------------------------
1 | import scipy
2 | from scipy import stats
3 | import numpy as np
4 | from math import sqrt, asin, cos, sin, atan2, ceil
5 | import networkx as nx
6 | import sys
7 | import pickle
8 |
9 | from .geometry_utils import *
10 |
11 | class Obstacle(object):
12 | def __init__(self, c, w, h):
13 |
14 | if type(c) == type([]):
15 | self.rectangle_centers = c
16 | self.rectangle_widths = w
17 | self.rectangle_heights = h
18 |
19 | assert(c)
20 |
21 | self.lowest_point = np.array([ c[0][0], c[0][1] - h[0]/2.0])
22 | self.representative_point = c[0].copy()
23 |
24 | for ca, wa, ha in zip(c, w, h):
25 | if (self.lowest_point[1] > ca[1] - ha/2.0):
26 | self.lowest_point = np.array([ca[0], ca[1] - ha/2.0])
27 |
28 | else:
29 | self.rectangle_centers = [c]
30 | self.rectangle_widths = [w]
31 | self.rectangle_heights = [h]
32 | self.lowest_point = np.array([c[0], c[1] - h/2.0])
33 | self.representative_point = c.copy()
34 |
35 | def __eq__(self, other):
36 | return all([ (sc == oc).all() for sc, oc in zip(self.rectangle_centers, other.rectangle_centers) ]) and \
37 | all([ (sw == ow).all() for sw, ow in zip(self.rectangle_widths, other.rectangle_widths) ]) and \
38 | all([ (sh == oh).all() for sh, oh in zip(self.rectangle_heights, other.rectangle_heights) ])
39 |
40 | def append(self, ca, wa, ha):
41 | self.rectangle_centers.append(ca)
42 | self.rectangle_widths.append(wa)
43 | self.rectangle_heights.append(ha)
44 |
45 | if (self.lowest_point[1] > ca[1] - ha/2.0):
46 | self.lowest_point = np.array([ca[0], ca[1] - ha/2.0])
47 |
48 | def merge(self, obs):
49 | self.rectangle_centers.extend(obs.rectangle_centers)
50 | self.rectangle_widths.extend(obs.rectangle_widths)
51 | self.rectangle_heights.extend(obs.rectangle_heights)
52 |
53 | for ca, wa, ha in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights):
54 | if (self.lowest_point[1] > ca[1] - ha/2.0):
55 | self.lowest_point = np.array([ca[0], ca[1] - ha/2.0])
56 |
57 | def distance_to_point(self, x, y):
58 | p = np.array([x,y])
59 | dist = [point_to_rectangle_distance(p, ca, wa, ha) for ca,wa,ha in zip(self.rectangle_centers, self.rectangle_widths, self.rectangle_heights)]
60 | return min(dist)
61 |
62 | def distance_to_rectangle(self, ca, wa, ha):
63 | dist = [rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb) for cb,wb,hb in zip(self.rectangle_centers, self.rectangle_widths, self.rectangle_heights)]
64 | return min(dist)
65 |
66 | def distance_to_obstacle(self, obs):
67 | dist = [self.distance_to_rectangle(ca, wa, ha) for ca,wa,ha in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights)]
68 | return min(dist)
69 |
70 | def closest_point_to(self, p):
71 | closest_points_to_segments = [closest_point_on_segment(p, s, t) for c,w,h in zip(self.rectangle_centers, self.rectangle_widths, self.rectangle_heights) \
72 | for s,t in rectangle_edges( np.array([c[0] + w/2.0, c[1] + h/2.0]), \
73 | np.array([c[0] + w/2.0, c[1] - h/2.0]), \
74 | np.array([c[0] - w/2.0, c[1] - h/2.0]), \
75 | np.array([c[0] - w/2.0, c[1] + h/2.0]) )]
76 |
77 | distances = [np.linalg.norm(p - cp) for cp in closest_points_to_segments]
78 | idx = np.argmin(distances)
79 |
80 | return closest_points_to_segments[idx]
81 |
82 | class Environment(object):
83 | def __init__(self, x_range, y_range, obstacles):
84 | self.obstacles = obstacles
85 | self.x_range = x_range
86 | self.y_range = y_range
87 |
88 | w = x_range[1] - x_range[0]
89 | h = y_range[1] - y_range[0]
90 |
91 | self.image = 255*np.ones((h, w, 3), dtype='uint8')
92 | for obs in self.obstacles:
93 | for co, wo, ho in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights):
94 | r = co[1]
95 | c = co[0]
96 | min_row = max(int(r - ho/2.0), 0)
97 | max_row = min(int(r + ho/2.0), h-1)
98 |
99 | min_col = max(int(c - wo/2.0), 0)
100 | max_col = min(int(c + wo/2.0), w-1)
101 |
102 | self.image[min_row:max_row, min_col:max_col, :] = (204, 153, 102)
103 |
104 |
105 | def __eq__(self, other):
106 | return self.obstacles == other.obstacles and self.x_range == other.x_range and self.y_range == other.y_range
107 |
108 | def point_distance_from_obstacles(self, x, y):
109 | dist = [obs.distance_to_point(x, y) for obs in self.obstacles]
110 | return min(dist)
111 |
112 |
113 | def point_is_in_free_space(self, x, y, epsilon=0.25):
114 | row = int(y)
115 | col = int(x)
116 |
117 | if (row >=0 and row < self.image.shape[0] and col >= 0 and col < self.image.shape[1]):
118 | return (self.image[row, col, :] == (255, 255, 255)).all()
119 | else:
120 | return True
121 |
122 |
123 | def range_and_bearing_to_closest_obstacle(self, x,y):
124 | dist = [(self.obstacles[i].distance_to_point(x, y), i) for i in range(len(self.obstacles))]
125 | distance_to_closest_obstacle, idx_closest = min(dist)
126 | closest_obstacle = self.obstacles[idx_closest]
127 | cp = closest_obstacle.closest_point_to(np.array([x,y]))
128 | bearing_to_closest_obstacle = atan2(cp[1]-y, cp[0]-x)
129 | return distance_to_closest_obstacle, bearing_to_closest_obstacle
130 |
131 |
132 | def segment_is_in_free_space(self, x1,y1, x2,y2, epsilon=0.5):
133 | # Note: this is assuming that 1px = 1m
134 | a = np.array([x1,y1])
135 | b = np.array([x2,y2])
136 | L = np.linalg.norm(b-a)
137 | return all([self.point_is_in_free_space(a[0] + i/L*(b[0]-a[0]), a[1] + i/L*(b[1]-a[1])) for i in range(ceil(L))])
138 |
139 |
140 | def segment_distance_from_obstacles(self, x1, y1, x2, y2):
141 |
142 | if not self.segment_is_in_free_space(x1, y1, x2, y2, epsilon=1e-10):
143 | return 0.0
144 |
145 | a = np.array([x1, y1])
146 | b = np.array([x2, y2])
147 |
148 | dist = [point_to_segment_distance(p, a, b) for obs in self.obstacles \
149 | for c,w,h in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights) for p in rectangle_vertices(c,w,h)]
150 |
151 | return min(dist)
152 |
153 | def raytrace(self, p, theta, max_range, n_evals=50):
154 | """TODO: implement a less naive collision algo than this"""
155 | ct = cos(theta)
156 | st = sin(theta)
157 | direction = np.array([ct, st])
158 |
159 | a = p
160 | b = p + max_range * direction
161 |
162 | if self.segment_is_in_free_space(a[0], a[1], b[0], b[1], epsilon=1e-10):
163 | return -1.0
164 |
165 | last_free_dist = 0
166 | for e in range(n_evals):
167 | dist = e/float(n_evals) * max_range
168 | c = a + dist * direction
169 | if not self.point_is_in_free_space(c[0], c[1], epsilon=1e-10):
170 | return last_free_dist
171 |
172 | last_free_dist = dist
173 | return max_range
174 |
175 | def winding_angle(self, path, point):
176 | wa = 0
177 | for i in range(len(path)-1):
178 | p = np.array([path[i].x, path[i].y])
179 | pn = np.array([path[i+1].x, path[i+1].y])
180 |
181 | vp = p - point
182 | vpn = pn - point
183 |
184 | vp_norm = sqrt(vp[0]**2 + vp[1]**2)
185 | vpn_norm = sqrt(vpn[0]**2 + vpn[1]**2)
186 |
187 | assert (vp_norm > 0)
188 | assert (vpn_norm > 0)
189 |
190 | z = np.cross(vp, vpn)/(vp_norm * vpn_norm)
191 | z = min(max(z, -1.0), 1.0)
192 | wa += asin(z)
193 |
194 | return wa
195 |
196 | def homology_vector(self, path):
197 | L = len(self.obstacles)
198 | h = np.zeros((L, 1) )
199 | for i in range(L):
200 | h[i, 0] = self.winding_angle(path, self.obstacles[i].representative_point)
201 |
202 | return h.reshape((L,))
203 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/geometry_utils.py:
--------------------------------------------------------------------------------
1 | import scipy
2 | from scipy import stats
3 | import numpy as np
4 | from math import sqrt
5 | from itertools import product
6 |
7 | def closest_point_on_segment(p, a, b):
8 | lsqr = (a[0]-b[0])**2 + (a[1]-b[1])**2
9 | pa = p-a
10 | ba = b-a
11 |
12 | if (lsqr < 1e-15):
13 | return a
14 |
15 | t_opt = pa.dot(ba) / lsqr
16 | t_opt = max(min(t_opt, 1), 0)
17 | cp = a + t_opt * (b-a)
18 | return cp
19 |
20 | def point_to_segment_distance(p, a, b):
21 | cp = closest_point_on_segment(p, a, b)
22 | return np.linalg.norm(cp - p)
23 |
24 | def point_to_rectangle_distance(p, ca, wa, ha):
25 |
26 | if (abs(p-ca) < np.array([wa/2.0, ha/2.0])).all():
27 | return 0.
28 |
29 | a1 = ca + np.array([wa/2.0, ha/2.0])
30 | a2 = ca + np.array([wa/2.0, -ha/2.0])
31 | a3 = ca + np.array([-wa/2.0, -ha/2.0])
32 | a4 = ca + np.array([-wa/2.0, ha/2.0])
33 |
34 | d1 = point_to_segment_distance(p, a1, a2)
35 | d2 = point_to_segment_distance(p, a2, a3)
36 | d3 = point_to_segment_distance(p, a3, a4)
37 | d4 = point_to_segment_distance(p, a4, a1)
38 | return min([d1, d2, d3, d4])
39 |
40 | def point_is_on_left(a, b, c):
41 | """Returns true iff c is on the left of the infinite line ab"""
42 | return (a[0] - c[0]) * (b[1] - c[1]) > (a[1] - c[1]) * (b[0] - c[0])
43 |
44 | def segments_intersect(a, b, c, d):
45 | """Returns true iff the line segments ab and cd intersect"""
46 | epsilon = 1e-10
47 |
48 | if point_to_segment_distance(a, c, d) < epsilon or point_to_segment_distance(b, c, d) < epsilon:
49 | return True
50 |
51 | if point_to_segment_distance(c, a, b) < epsilon or point_to_segment_distance(d, a, b) < epsilon:
52 | return True
53 |
54 | return point_is_on_left(a,b,c) != point_is_on_left(a,b,d) and point_is_on_left(c,d,a) != point_is_on_left(c,d,b)
55 |
56 | def rectangle_edges(z1,z2,z3,z4):
57 | yield (z1, z2)
58 | yield (z2, z3)
59 | yield (z3, z4)
60 | yield (z4, z1)
61 |
62 | def rectangle_vertices(c,w,h):
63 | yield c + np.array([w/2.0, h/2.0])
64 | yield c + np.array([w/2.0, -h/2.0])
65 | yield c + np.array([-w/2.0, -h/2.0])
66 | yield c + np.array([-w/2.0, h/2.0])
67 |
68 |
69 | def rectangles_intersect(ca,wa,ha, q1,q2,q3,q4):
70 | a1 = ca + np.array([wa/2.0, ha/2.0])
71 | a2 = ca + np.array([wa/2.0, -ha/2.0])
72 | a3 = ca + np.array([-wa/2.0, -ha/2.0])
73 | a4 = ca + np.array([-wa/2.0, ha/2.0])
74 |
75 | cq = (q1 + q2 + q3 + q4)/4.0
76 | cq_is_in_rectangle = abs(cq[0] - ca[0]) < wa/2.0 and abs(cq[1] - ca[1]) < ha/2.0
77 |
78 | segment_intersections = [segments_intersect(e1[0], e1[1], e2[0], e2[1]) for e1, e2 in product(rectangle_edges(a1,a2,a3,a4), rectangle_edges(q1,q2,q3,q4))]
79 | return any(segment_intersections) or cq_is_in_rectangle
80 |
81 |
82 | def rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb):
83 | a1 = ca + np.array([wa/2.0, ha/2.0])
84 | a2 = ca + np.array([wa/2.0, -ha/2.0])
85 | a3 = ca + np.array([-wa/2.0, -ha/2.0])
86 | a4 = ca + np.array([-wa/2.0, ha/2.0])
87 |
88 | b1 = cb + np.array([wb/2.0, hb/2.0])
89 | b2 = cb + np.array([wb/2.0, -hb/2.0])
90 | b3 = cb + np.array([-wb/2.0, -hb/2.0])
91 | b4 = cb + np.array([-wb/2.0, hb/2.0])
92 |
93 | for e1, e2 in product(rectangle_edges(a1,a2,a3,a4), rectangle_edges(b1,b2,b3,b4)):
94 | if segments_intersect(e1[0], e1[1], e2[0], e2[1]):
95 | return 0.0
96 |
97 | da1 = point_to_rectangle_distance(a1, cb, wb, hb)
98 | da2 = point_to_rectangle_distance(a2, cb, wb, hb)
99 | da3 = point_to_rectangle_distance(a3, cb, wb, hb)
100 | da4 = point_to_rectangle_distance(a4, cb, wb, hb)
101 |
102 | db1 = point_to_rectangle_distance(b1, ca, wa, ha)
103 | db2 = point_to_rectangle_distance(b2, ca, wa, ha)
104 | db3 = point_to_rectangle_distance(b3, ca, wa, ha)
105 | db4 = point_to_rectangle_distance(b4, ca, wa, ha)
106 | return min([da1, da2, da3, da4, db1, db2, db3, db4])
107 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/gym_rendering.py:
--------------------------------------------------------------------------------
1 | """
2 | 2D rendering framework
3 | """
4 | from __future__ import division
5 | import os
6 | import six
7 | import sys
8 |
9 | if "Apple" in sys.version:
10 | if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
11 | os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
12 | # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
13 |
14 | from gym.utils import reraise
15 | from gym import error
16 |
17 | try:
18 | import pyglet
19 | except ImportError as e:
20 | reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
21 |
22 | try:
23 | from pyglet import gl
24 | except ImportError as e:
25 | reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '")
26 |
27 | import math
28 | import numpy as np
29 |
30 | RAD2DEG = 57.29577951308232
31 |
32 | def get_display(spec):
33 | """Convert a display specification (such as :0) into an actual Display
34 | object.
35 |
36 | Pyglet only supports multiple Displays on Linux.
37 | """
38 | if spec is None:
39 | return None
40 | elif isinstance(spec, six.string_types):
41 | return pyglet.canvas.Display(spec)
42 | else:
43 | raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
44 |
45 | class Viewer(object):
46 | def __init__(self, width, height, display=None):
47 | display = get_display(display)
48 |
49 | self.width = width
50 | self.height = height
51 | self.window = pyglet.window.Window(width=width, height=height, display=display)
52 | self.window.on_close = self.window_closed_by_user
53 | self.geoms = []
54 | self.onetime_geoms = []
55 | self.transform = Transform()
56 |
57 | gl.glEnable(gl.GL_BLEND)
58 | gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA)
59 |
60 | def close(self):
61 | self.window.close()
62 |
63 | def window_closed_by_user(self):
64 | self.close()
65 |
66 | def set_bounds(self, left, right, bottom, top):
67 | assert right > left and top > bottom
68 | scalex = self.width/(right-left)
69 | scaley = self.height/(top-bottom)
70 | self.transform = Transform(
71 | translation=(-left*scalex, -bottom*scaley),
72 | scale=(scalex, scaley))
73 |
74 | def add_geom(self, geom):
75 | self.geoms.append(geom)
76 |
77 | def add_onetime(self, geom):
78 | self.onetime_geoms.append(geom)
79 |
80 | def render(self, return_rgb_array=False):
81 | gl.glClearColor(1,1,1,1)
82 | self.window.clear()
83 | self.window.switch_to()
84 | self.window.dispatch_events()
85 | self.transform.enable()
86 | for geom in self.geoms:
87 | geom.render()
88 | for geom in self.onetime_geoms:
89 | geom.render()
90 | self.transform.disable()
91 | arr = None
92 | if return_rgb_array:
93 | buffer = pyglet.image.get_buffer_manager().get_color_buffer()
94 | image_data = buffer.get_image_data()
95 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
96 | # In https://github.com/openai/gym-http-api/issues/2, we
97 | # discovered that someone using Xmonad on Arch was having
98 | # a window of size 598 x 398, though a 600 x 400 window
99 | # was requested. (Guess Xmonad was preserving a pixel for
100 | # the boundary.) So we use the buffer height/width rather
101 | # than the requested one.
102 | arr = arr.reshape(buffer.height, buffer.width, 4)
103 | arr = arr[::-1,:,0:3]
104 | self.window.flip()
105 | self.onetime_geoms = []
106 | return arr
107 |
108 | # Convenience
109 | def draw_circle(self, radius=10, res=30, filled=True, **attrs):
110 | geom = make_circle(radius=radius, res=res, filled=filled)
111 | _add_attrs(geom, attrs)
112 | self.add_onetime(geom)
113 | return geom
114 |
115 | def draw_polygon(self, v, filled=True, **attrs):
116 | geom = make_polygon(v=v, filled=filled)
117 | _add_attrs(geom, attrs)
118 | self.add_onetime(geom)
119 | return geom
120 |
121 | def draw_polyline(self, v, **attrs):
122 | geom = make_polyline(v=v)
123 | _add_attrs(geom, attrs)
124 | self.add_onetime(geom)
125 | return geom
126 |
127 | def draw_line(self, start, end, **attrs):
128 | geom = Line(start, end)
129 | _add_attrs(geom, attrs)
130 | self.add_onetime(geom)
131 | return geom
132 |
133 | def get_array(self):
134 | self.window.flip()
135 | image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
136 | self.window.flip()
137 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
138 | arr = arr.reshape(self.height, self.width, 4)
139 | return arr[::-1,:,0:3]
140 |
141 | def _add_attrs(geom, attrs):
142 | if "color" in attrs:
143 | geom.set_color(*attrs["color"])
144 | if "linewidth" in attrs:
145 | geom.set_linewidth(attrs["linewidth"])
146 |
147 | class Geom(object):
148 | def __init__(self):
149 | self._color=Color((0, 0, 0, 1.0))
150 | self.attrs = [self._color]
151 | def render(self):
152 | for attr in reversed(self.attrs):
153 | attr.enable()
154 | self.render1()
155 | for attr in self.attrs:
156 | attr.disable()
157 | def render1(self):
158 | raise NotImplementedError
159 | def add_attr(self, attr):
160 | self.attrs.append(attr)
161 | def set_color(self, r, g, b):
162 | self._color.vec4 = (r, g, b, 1)
163 |
164 | class Attr(object):
165 | def enable(self):
166 | raise NotImplementedError
167 | def disable(self):
168 | pass
169 |
170 | class Transform(Attr):
171 | def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
172 | self.set_translation(*translation)
173 | self.set_rotation(rotation)
174 | self.set_scale(*scale)
175 | def enable(self):
176 | gl.glPushMatrix()
177 | gl.glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
178 | gl.glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
179 | gl.glScalef(self.scale[0], self.scale[1], 1)
180 | def disable(self):
181 | gl.glPopMatrix()
182 | def set_translation(self, newx, newy):
183 | self.translation = (float(newx), float(newy))
184 | def set_rotation(self, new):
185 | self.rotation = float(new)
186 | def set_scale(self, newx, newy):
187 | self.scale = (float(newx), float(newy))
188 |
189 | class Color(Attr):
190 | def __init__(self, vec4):
191 | self.vec4 = vec4
192 | def enable(self):
193 | gl.glColor4f(*self.vec4)
194 |
195 | class LineStyle(Attr):
196 | def __init__(self, style):
197 | self.style = style
198 | def enable(self):
199 | gl.glEnable(gl.GL_LINE_STIPPLE)
200 | gl.glLineStipple(1, self.style)
201 | def disable(self):
202 | gl.glDisable(gl.GL_LINE_STIPPLE)
203 |
204 | class LineWidth(Attr):
205 | def __init__(self, stroke):
206 | self.stroke = stroke
207 | def enable(self):
208 | gl.glLineWidth(self.stroke)
209 |
210 | class Point(Geom):
211 | def __init__(self):
212 | Geom.__init__(self)
213 | def render1(self):
214 | gl.glBegin(gl.GL_POINTS) # draw point
215 | gl.glVertex3f(0.0, 0.0, 0.0)
216 | gl.glEnd()
217 |
218 | class FilledPolygon(Geom):
219 | def __init__(self, v):
220 | Geom.__init__(self)
221 | self.v = v
222 | def render1(self):
223 | if len(self.v) == 4 : gl.glBegin(gl.GL_QUADS)
224 | elif len(self.v) > 4 : gl.glBegin(gl.GL_POLYGON)
225 | else: gl.glBegin(gl.GL_TRIANGLES)
226 | for p in self.v:
227 | gl.glVertex3f(p[0], p[1],0) # draw each vertex
228 | gl.glEnd()
229 |
230 | def make_circle(radius=10, res=30, filled=True):
231 | points = []
232 | for i in range(res):
233 | ang = 2*math.pi*i / res
234 | points.append((math.cos(ang)*radius, math.sin(ang)*radius))
235 | if filled:
236 | return FilledPolygon(points)
237 | else:
238 | return PolyLine(points, True)
239 |
240 | def make_polygon(v, filled=True):
241 | if filled: return FilledPolygon(v)
242 | else: return PolyLine(v, True)
243 |
244 | def make_polyline(v):
245 | return PolyLine(v, False)
246 |
247 | def make_capsule(length, width):
248 | l, r, t, b = 0, length, width/2, -width/2
249 | box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
250 | circ0 = make_circle(width/2)
251 | circ1 = make_circle(width/2)
252 | circ1.add_attr(Transform(translation=(length, 0)))
253 | geom = Compound([box, circ0, circ1])
254 | return geom
255 |
256 | class Compound(Geom):
257 | def __init__(self, gs):
258 | Geom.__init__(self)
259 | self.gs = gs
260 | for g in self.gs:
261 | g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
262 | def render1(self):
263 | for g in self.gs:
264 | g.render()
265 |
266 | class PolyLine(Geom):
267 | def __init__(self, v, close):
268 | Geom.__init__(self)
269 | self.v = v
270 | self.close = close
271 | self.linewidth = LineWidth(1)
272 | self.add_attr(self.linewidth)
273 | def render1(self):
274 | gl.glBegin(gl.GL_LINE_LOOP if self.close else gl.GL_LINE_STRIP)
275 | for p in self.v:
276 | gl.glVertex3f(p[0], p[1],0) # draw each vertex
277 | gl.glEnd()
278 | def set_linewidth(self, x):
279 | self.linewidth.stroke = x
280 |
281 | class Line(Geom):
282 | def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
283 | Geom.__init__(self)
284 | self.start = start
285 | self.end = end
286 | self.linewidth = LineWidth(1)
287 | self.add_attr(self.linewidth)
288 |
289 | def render1(self):
290 | gl.glBegin(gl.GL_LINES)
291 | gl.glVertex2f(*self.start)
292 | gl.glVertex2f(*self.end)
293 | gl.glEnd()
294 |
295 | class Image(Geom):
296 | def __init__(self, fname, width, height):
297 | Geom.__init__(self)
298 | self.width = width
299 | self.height = height
300 | img = pyglet.image.load(fname)
301 | self.img = img
302 | self.flip = False
303 | def render1(self):
304 | self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
305 |
306 | # ================================================================
307 |
308 | class SimpleImageViewer(object):
309 | def __init__(self, display=None):
310 | self.window = None
311 | self.isopen = False
312 | self.display = display
313 | def imshow(self, arr):
314 | if self.window is None:
315 | height, width, channels = arr.shape
316 | self.window = pyglet.window.Window(width=width, height=height, display=self.display)
317 | self.width = width
318 | self.height = height
319 | self.isopen = True
320 | assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
321 | image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
322 | self.window.clear()
323 | self.window.switch_to()
324 | self.window.dispatch_events()
325 | image.blit(0,0)
326 | self.window.flip()
327 | def close(self):
328 | if self.isopen:
329 | self.window.close()
330 | self.isopen = False
331 | def __del__(self):
332 | self.close()
333 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/image_based_navigation.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import error, spaces, utils
3 | from gym.utils import seeding
4 | from gym.spaces import Box, Tuple
5 |
6 | from math import pi, cos, sin
7 | import numpy as np
8 | import logging
9 | from skimage.draw import circle
10 | from skimage.transform import resize
11 | from .range_based_navigation import StateBasedMDPNavigation2DEnv
12 |
13 | class ImageBasedNavigation2DEnv(StateBasedMDPNavigation2DEnv):
14 | logger = logging.getLogger(__name__)
15 | metadata = {'render.modes': ['human']}
16 |
17 | def __init__(self, *args, **kwargs):
18 | StateBasedMDPNavigation2DEnv.__init__(self, *args, **kwargs)
19 | self.obs_img_shape = (160, 120, 3)
20 |
21 | self.observation_space = Box(0., 255., (self.obs_img_shape[1], self.obs_img_shape[0], 3))
22 |
23 | def set_circular_observation(self, img, col_center, row_center, radius, color=(0,0,0)):
24 | rr,cc = circle(row_center, col_center, radius)
25 | # make sure within bounds of img
26 | rr[rr<0] = 0
27 | rr[rr>img.shape[0]-1] = img.shape[0]-1
28 | cc[cc<0] = 0
29 | cc[cc>img.shape[1]-1] = img.shape[1]-1
30 | img[rr,cc] = color
31 | return img
32 |
33 | def _get_observation(self, state):
34 | image = self.world.image.copy()
35 |
36 | state_col = int(self.state[0])
37 | state_row = int(self.state[1])
38 |
39 | dest_col = int(self.destination[0])
40 | dest_row = int(self.destination[1])
41 | dest_rad = int(self.destination_tolerance_range)
42 | image = self.set_circular_observation(image, state_col, state_row, 5, (0,0,0))
43 | image = self.set_circular_observation(image, dest_col, dest_row, dest_rad, (255,0,0))
44 | image = image[::-1,:,:]
45 | image = resize(image, self.obs_img_shape[0:2][::-1])
46 |
47 | return image
48 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/range_based_navigation.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import error, spaces, utils
3 | from gym.utils import seeding
4 | from gym.spaces import Box, Tuple
5 | from .env_generator import EnvironmentCollection
6 |
7 | from math import pi, cos, sin
8 | import numpy as np
9 |
10 | #from gym.envs.classic_control.rendering import make_circle, Transform
11 | from gym_extensions.continuous.gym_navigation_2d import gym_rendering
12 | import os
13 | import logging
14 |
15 | class LimitedRangeBasedPOMDPNavigation2DEnv(gym.Env):
16 | logger = logging.getLogger(__name__)
17 | metadata = {'render.modes': ['human']}
18 |
19 | def __init__(self,
20 | worlds_pickle_filename=os.path.join(os.path.dirname(__file__), "assets", "worlds_640x480_v0.pkl"),
21 | world_idx=0,
22 | initial_position = np.array([-20.0, -20.0]),
23 | destination = np.array([520.0, 400.0]),
24 | max_observation_range = 100.0,
25 | destination_tolerance_range=20.0,
26 | add_self_position_to_observation=False,
27 | add_goal_position_to_observation=False):
28 |
29 | worlds = EnvironmentCollection()
30 | worlds.read(worlds_pickle_filename)
31 |
32 | self.world = worlds.map_collection[world_idx]
33 | self.set_destination(destination)
34 |
35 | assert not (self.destination is None)
36 | self.init_position = initial_position
37 | self.state = self.init_position.copy()
38 |
39 |
40 | self.max_observation_range = max_observation_range
41 | self.destination_tolerance_range = destination_tolerance_range
42 | self.viewer = None
43 | self.num_beams = 16
44 | self.max_speed = 5
45 | self.add_self_position_to_observation = add_self_position_to_observation
46 | self.add_goal_position_to_observation = add_goal_position_to_observation
47 |
48 |
49 | low = np.array([0.0, 0.0])
50 | high = np.array([self.max_speed, 2*pi])
51 | self.action_space = Box(low, high)#Tuple( (Box(0.0, self.max_speed, (1,)), Box(0.0, 2*pi, (1,))) )
52 | low = [-1.0] * self.num_beams
53 | high = [self.max_observation_range] * self.num_beams
54 | if add_self_position_to_observation:
55 | low.extend([-10000., -10000.]) # x and y coords
56 | high.extend([10000., 10000.])
57 | if add_goal_position_to_observation:
58 | low.extend([-10000., -10000.]) # x and y coords
59 | high.extend([10000., 10000.])
60 |
61 |
62 | self.observation_space = Box(np.array(low), np.array(high))
63 | self.observation = []
64 |
65 | def set_destination(self, destination):
66 | self.destination = destination
67 |
68 | def _get_observation(self, state):
69 | delta_angle = 2*pi/self.num_beams
70 | ranges = [self.world.raytrace(self.state,
71 | i * delta_angle,
72 | self.max_observation_range) for i in range(self.num_beams)]
73 |
74 | ranges = np.array(ranges)
75 | if self.add_self_position_to_observation:
76 | ranges = np.concatenate([ranges, self.state])
77 | if self.add_goal_position_to_observation:
78 | ranges = np.concatenate([ranges, self.destination])
79 | return ranges
80 |
81 | def _step(self, action):
82 | old_state = self.state.copy()
83 | v = action[0]
84 | theta = action[1]
85 | dx = v*cos(theta)
86 | dy = v*sin(theta)
87 |
88 | self.state += np.array([dx, dy])
89 |
90 | reward = -1 # minus 1 for every timestep you're not in the goal
91 | done = False
92 | info = {}
93 |
94 | if np.linalg.norm(self.destination - self.state) < self.destination_tolerance_range:
95 | reward = 20 # for reaching the goal
96 | done = True
97 |
98 | if not self.world.point_is_in_free_space(self.state[0], self.state[1], epsilon=0.25):
99 | reward = -5 # for hitting an obstacle
100 |
101 | if not self.world.segment_is_in_free_space(old_state[0], old_state[1],
102 | self.state[0], self.state[1],
103 | epsilon=0.25):
104 | reward = -5 # for hitting an obstacle
105 |
106 | self.observation = self._get_observation(self.state)
107 | return self.observation, reward, done, info
108 |
109 |
110 | def _reset(self):
111 | self.state = self.init_position
112 | return self._get_observation(self.state)
113 |
114 | def _plot_state(self, viewer, state):
115 | polygon = gym_rendering.make_circle(radius=5, res=30, filled=True)
116 | state_tr = gym_rendering.Transform(translation=(state[0], state[1]))
117 | polygon.add_attr(state_tr)
118 | viewer.add_onetime(polygon)
119 |
120 |
121 | def _plot_observation(self, viewer, state, observation):
122 | delta_angle = 2*pi/self.num_beams
123 | for i in range(len(observation)):
124 | r = observation[i]
125 | if r < 0:
126 | r = self.max_observation_range
127 |
128 | theta = i*delta_angle
129 | start = (state[0], state[1])
130 | end = (state[0] + r*cos(theta), state[1] + r*sin(theta))
131 |
132 | line = gym_rendering.Line(start=start, end=end)
133 | line.set_color(.5, 0.5, 0.5)
134 | viewer.add_onetime(line)
135 |
136 | def _append_elements_to_viewer(self, viewer,
137 | screen_width,
138 | screen_height,
139 | obstacles,
140 | destination=None,
141 | destination_tolerance_range=None):
142 |
143 | viewer.set_bounds(left=-100, right=screen_width+100, bottom=-100, top=screen_height+100)
144 |
145 | L = len(obstacles)
146 | for i in range(L):
147 |
148 | obs = obstacles[i]
149 | for c,w,h in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights):
150 | l = -w/2.0
151 | r = w/2.0
152 | t = h/2.0
153 | b = -h/2.0
154 |
155 | rectangle = gym_rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
156 | tr = gym_rendering.Transform(translation=(c[0], c[1]))
157 | rectangle.add_attr(tr)
158 | rectangle.set_color(.8,.6,.4)
159 | viewer.add_geom(rectangle)
160 |
161 |
162 | if not (destination is None):
163 | tr = gym_rendering.Transform(translation=(destination[0], destination[1]))
164 | polygon = gym_rendering.make_circle(radius=destination_tolerance_range, res=30, filled=True)
165 | polygon.add_attr(tr)
166 | polygon.set_color(1.0, 0., 0.)
167 | viewer.add_geom(polygon)
168 |
169 | def _render(self, mode='human', close=False):
170 |
171 | if close:
172 | if self.viewer is not None:
173 | self.viewer.close()
174 | self.viewer = None
175 | return
176 |
177 | screen_width = (self.world.x_range[1] - self.world.x_range[0])
178 | screen_height = (self.world.y_range[1] - self.world.y_range[0])
179 |
180 | if self.viewer is None:
181 | self.viewer = gym_rendering.Viewer(screen_width, screen_height)
182 | self._append_elements_to_viewer(self.viewer,
183 | screen_width,
184 | screen_height,
185 | obstacles=self.world.obstacles,
186 | destination=self.destination,
187 | destination_tolerance_range=self.destination_tolerance_range)
188 |
189 | self._plot_state(self.viewer, self.state)
190 | self._plot_observation(self.viewer, self.state, self.observation)
191 |
192 | return self.viewer.render(return_rgb_array = mode=='rgb_array')
193 |
194 | class StateBasedMDPNavigation2DEnv(LimitedRangeBasedPOMDPNavigation2DEnv):
195 | logger = logging.getLogger(__name__)
196 | def __init__(self, *args, **kwargs):
197 | LimitedRangeBasedPOMDPNavigation2DEnv.__init__(self, *args, **kwargs)
198 | low = [-float('inf'), -float('inf'), 0.0, 0.0]
199 | high = [float('inf'), float('inf'), float('inf'), 2*pi]
200 |
201 | if self.add_goal_position_to_observation:
202 | low.extend([-10000., -10000.]) # x and y coords
203 | high.extend([10000., 10000.])
204 |
205 | self.observation_space = Box(np.array(low), np.array(high))
206 |
207 | def _plot_observation(self, viewer, state, observation):
208 | pass
209 |
210 | def _get_observation(self, state):
211 | # return state
212 | dist_to_closest_obstacle, absolute_angle_to_closest_obstacle = self.world.range_and_bearing_to_closest_obstacle(state[0], state[1])
213 | obs = np.array([state[0], state[1], dist_to_closest_obstacle, absolute_angle_to_closest_obstacle])
214 | if self.add_goal_position_to_observation:
215 | obs = np.concatenate([obs, self.destination])
216 | return obs
217 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/rrt.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import sys
3 | import time
4 | import pickle
5 | import numpy as np
6 | import random
7 |
8 | from itertools import product
9 | from math import cos, sin, pi, sqrt
10 | import copy
11 |
12 | class State(object):
13 | def __init__(self, x, y, parent):
14 | self.x = x
15 | self.y = y
16 | self.parent = parent
17 | self.children = []
18 |
19 | def __hash__(self):
20 | return hash((self.x, self.y))
21 |
22 | def euclidean_distance(self, state):
23 | assert (state)
24 | return sqrt((state.x - self.x)**2 + (state.y - self.y)**2)
25 |
26 |
27 | class RRT(object):
28 | def __init__(self, world):
29 | self.world = world
30 |
31 | def state_is_free(self, state):
32 | return self.world.point_is_in_free_space(state.x, state.y, epsilon=0.2)
33 |
34 | def sample_state(self):
35 | x = random.uniform(self.world.x_range[0], self.world.x_range[1])
36 | y = random.uniform(self.world.y_range[0], self.world.y_range[1])
37 | return State(x, y, None)
38 |
39 |
40 | def _follow_parent_pointers(self, state):
41 | """
42 | Returns the path [start_state, ..., destination_state] by following the
43 | parent pointers.
44 | """
45 |
46 | curr_ptr = state
47 | path = [state]
48 |
49 | while curr_ptr is not None:
50 | path.append(curr_ptr)
51 | curr_ptr = curr_ptr.parent
52 |
53 | # return a reverse copy of the path (so that first state is starting state)
54 | return path[::-1]
55 |
56 |
57 | def find_closest_state(self, tree_nodes, state):
58 | min_dist = float("Inf")
59 | closest_state = None
60 | for node in tree_nodes:
61 | dist = node.euclidean_distance(state)
62 | if dist < min_dist:
63 | closest_state = node
64 | min_dist = dist
65 |
66 | return closest_state
67 |
68 | def steer_towards(self, s_nearest, s_rand, max_radius):
69 | """
70 | Returns a new state s_new whose coordinates x and y
71 | are decided as follows:
72 |
73 | If s_rand is within a circle of max_radius from s_nearest
74 | then s_new.x = s_rand.x and s_new.y = s_rand.y
75 |
76 | Otherwise, s_rand is farther than max_radius from s_nearest.
77 | In this case we place s_new on the line from s_nearest to
78 | s_rand, at a distance of max_radius away from s_nearest.
79 |
80 | """
81 |
82 | vx = float(s_rand.x - s_nearest.x)
83 | vy = float(s_rand.y - s_nearest.y)
84 | v = sqrt(vx**2 + vy**2)
85 |
86 | x = 0
87 | y = 0
88 |
89 | if v < max_radius:
90 | x = s_rand.x
91 | y = s_rand.y
92 | elif v > 1e-10:
93 | x = s_nearest.x + vx/v * max_radius
94 | y = s_nearest.y + vy/v * max_radius
95 |
96 | s_new = State(x, y, s_nearest)
97 | return s_new
98 |
99 |
100 | def segment_is_obstacle_free(self, s_from, s_to):
101 | """
102 | Returns true iff the line path from s_from to s_to
103 | is free
104 | """
105 | assert (self.state_is_free(s_from))
106 |
107 | if not (self.state_is_free(s_to)):
108 | return False
109 |
110 | return self.world.segment_is_in_free_space(s_from.x, s_from.y, s_to.x, s_to.y)
111 |
112 |
113 | def plan(self, start_state, dest_state, max_num_steps, max_steering_radius, dest_reached_radius):
114 | """
115 | Returns a path as a sequence of states [start_state, ..., dest_state]
116 | if dest_state is reachable from start_state. Otherwise returns [start_state].
117 | Assume both source and destination are in free space.
118 | """
119 | assert (self.state_is_free(start_state))
120 | assert (self.state_is_free(dest_state))
121 |
122 | # The set containing the nodes of the tree
123 | tree_nodes = set()
124 | tree_nodes.add(start_state)
125 |
126 | # image to be used to display the tree
127 | img = np.copy(self.world)
128 |
129 | plan = [start_state]
130 |
131 | for step in xrange(max_num_steps):
132 |
133 | s_rand = self.sample_state()
134 | s_nearest = self.find_closest_state(tree_nodes, s_rand)
135 | s_new = self.steer_towards(s_nearest, s_rand, max_steering_radius)
136 |
137 | if self.segment_is_obstacle_free(s_nearest, s_new):
138 | tree_nodes.add(s_new)
139 | s_nearest.children.append(s_new)
140 |
141 | # If we approach the destination within a few pixels
142 | # we're done. Return the path.
143 | if s_new.euclidean_distance(dest_state) < dest_reached_radius:
144 | dest_state.parent = s_new
145 | plan = self._follow_parent_pointers(dest_state)
146 | return plan, tree_nodes
147 |
148 | return [start_state], tree_nodes
149 |
150 | def smooth(self, path, alpha=1.5, rate=0.01, max_iterations=400, min_safe_distance=1.5):
151 | path = copy.deepcopy(path)
152 | old_path = np.array([ [state.x, state.y] for state in path])
153 | new_path = old_path.copy()
154 | it = 0
155 |
156 | while True:
157 |
158 | gradE = (new_path - old_path)
159 | for i in xrange(1, len(path)-1):
160 | gradE[i, :] -= alpha*(new_path[i+1, :] + new_path[i-1, :] - 2*new_path[i, :])
161 |
162 | gradE[0, :] = 0
163 | gradE[-1, :] = 0
164 |
165 | proposed_new_path = new_path - rate*gradE
166 |
167 | safe_idx = [self.world.segment_is_in_free_space(proposed_new_path[i-1][0],
168 | proposed_new_path[i-1][1],
169 | proposed_new_path[i][0],
170 | proposed_new_path[i][1]) and \
171 |
172 | self.world.segment_is_in_free_space(proposed_new_path[i][0],
173 | proposed_new_path[i][1],
174 | proposed_new_path[i+1][0],
175 | proposed_new_path[i+1][1]) \
176 |
177 | for i in xrange(1, proposed_new_path.shape[0]-1)]
178 |
179 |
180 | improving_idx = [self.world.segment_distance_from_obstacles(proposed_new_path[i][0],
181 | proposed_new_path[i][1],
182 | proposed_new_path[i+1][0],
183 | proposed_new_path[i+1][1]) >
184 |
185 | self.world.segment_distance_from_obstacles(old_path[i][0],
186 | old_path[i][1],
187 | old_path[i+1][0],
188 | old_path[i+1][1])
189 |
190 | for i in xrange(1, proposed_new_path.shape[0]-1)]
191 |
192 | safe_idx = np.array([False] + safe_idx + [False])
193 | improving_idx = np.array([False] + improving_idx + [False])
194 | safe_idx = safe_idx + improving_idx
195 |
196 | if not (any(safe_idx) and it < max_iterations):
197 | break
198 |
199 | new_path[safe_idx, :] = proposed_new_path[safe_idx, :]
200 | it += 1
201 |
202 | for i in xrange(1, len(path)-1):
203 | path[i].x = new_path[i, 0]
204 | path[i].y = new_path[i, 1]
205 |
206 | return path
207 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/gym_navigation_2d/test_env_generator.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import numpy as np
3 | from env_generator import EnvironmentCollection, EnvironmentGenerator
4 | from env_utils import Environment
5 | from geometry_utils import *
6 |
7 | from math import sqrt
8 | import os
9 |
10 | class TestDistanceMethods(unittest.TestCase):
11 |
12 | def test_point_to_segment(self):
13 | p = np.array([0,0])
14 | a = np.array([1,1])
15 | b = np.array([2,2])
16 | self.assertEqual(point_to_segment_distance(p,a,b), sqrt(2))
17 |
18 |
19 | def test_point_to_segment2(self):
20 | p = np.array([3,3])
21 | a = np.array([1,1])
22 | b = np.array([2,2])
23 | self.assertEqual(point_to_segment_distance(p,a,b), sqrt(2))
24 |
25 |
26 | def test_point_to_segment3(self):
27 | p = np.array([1.5,1.5])
28 | a = np.array([1,1])
29 | b = np.array([2,2])
30 | self.assertEqual(point_to_segment_distance(p,a,b), 0.)
31 |
32 |
33 | def test_point_to_segment4(self):
34 | p = np.array([1,0])
35 | a = np.array([1,1])
36 | b = np.array([2,2])
37 | self.assertEqual(point_to_segment_distance(p,a,b), 1)
38 |
39 |
40 | def test_point_to_rectangle_distance(self):
41 | p = np.array([0, 0])
42 | c = np.array([1, 1])
43 | w = 1.0
44 | h = 1.0
45 | self.assertEqual(point_to_rectangle_distance(p, c, w, h), sqrt(2*(0.5)**2))
46 |
47 |
48 | def test_point_to_rectangle_distance2(self):
49 | p = np.array([1, 1])
50 | c = np.array([1, 1])
51 | w = 1.0
52 | h = 1.0
53 | self.assertEqual(point_to_rectangle_distance(p, c, w, h), 0)
54 |
55 | def test_point_to_rectangle_distance3(self):
56 | p = np.array([0.6, 0.6])
57 | c = np.array([1, 1])
58 | w = 1.0
59 | h = 1.0
60 | self.assertEqual(point_to_rectangle_distance(p, c, w, h), 0)
61 |
62 | p = np.array([1.4, 1.4])
63 | c = np.array([1, 1])
64 | w = 1.0
65 | h = 1.0
66 | self.assertEqual(point_to_rectangle_distance(p, c, w, h), 0)
67 |
68 |
69 | def test_rectangle_to_rectangle_distance(self):
70 | ca = np.array([1, 1])
71 | wa = 1.0
72 | ha = 1.0
73 |
74 | cb = np.array([0, 0])
75 | wb = 1.0
76 | hb = 1.0
77 |
78 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0)
79 |
80 |
81 | def test_rectangle_to_rectangle_distance2(self):
82 | ca = np.array([0, 2])
83 | wa = 2.0
84 | ha = 2.0
85 |
86 | cb = np.array([0, 0])
87 | wb = 1.0
88 | hb = 1.0
89 |
90 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0.5)
91 |
92 |
93 | def test_rectangle_to_rectangle_distance3(self):
94 | ca = np.array([0, 2])
95 | wa = 2.0
96 | ha = 2.0
97 |
98 | cb = np.array([0, 0])
99 | wb = 1.0
100 | hb = 1.0
101 |
102 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0.5)
103 |
104 |
105 | def test_rectangle_to_rectangle_distance4(self):
106 | ca = np.array([0, 0])
107 | wa = 2.0
108 | ha = 2.0
109 |
110 | cb = np.array([0, 0])
111 | wb = 1.0
112 | hb = 1.0
113 |
114 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0.)
115 |
116 |
117 | def test_rectangle_to_rectangle_distance5(self):
118 | ca = np.array([-1, 0])
119 | wa = 1.0
120 | ha = 1.0
121 |
122 | cb = np.array([0, 0])
123 | wb = 1.0
124 | hb = 1.0
125 |
126 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0.)
127 |
128 |
129 | def test_rectangle_to_rectangle_distance6(self):
130 | ca = np.array([-2, 0])
131 | wa = 1.0
132 | ha = 1.0
133 |
134 | cb = np.array([0, 0])
135 | wb = 1.0
136 | hb = 1.0
137 |
138 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 1.)
139 |
140 |
141 | def test_rectangle_to_rectangle_distance7(self):
142 | ca = np.array([-2, -2])
143 | wa = 1.0
144 | ha = 1.0
145 |
146 | cb = np.array([0, 0])
147 | wb = 1.0
148 | hb = 1.0
149 |
150 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), sqrt(8) - 2*sqrt(2*0.5**2))
151 |
152 | def test_rectangle_to_rectangle_distance8(self):
153 | ca = np.array([45.41472479, 42.0288299])
154 | wa = 1.63125936059
155 | ha = 6.08773739678
156 |
157 | cb = np.array([45.70077727, 41.39116316])
158 | wb = 6.98818845647
159 | hb = 3.83908874358
160 |
161 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0)
162 |
163 |
164 | def test_rectangle_to_object_distance(self):
165 | ca = np.array([-2, -2])
166 | wa = 1.0
167 | ha = 1.0
168 |
169 | obs = Obstacle(ca,wa,ha)
170 |
171 | cb = np.array([0, 0])
172 | wb = 1.0
173 | hb = 1.0
174 |
175 | self.assertEqual(obs.distance_to_rectangle(cb, wb, hb), sqrt(8) - 2*sqrt(2*0.5**2))
176 |
177 |
178 | def test_rectangle_to_object_distance2(self):
179 | ca = np.array([-2, -2])
180 | wa = 1.0
181 | ha = 1.0
182 | obs = Obstacle(ca,wa,ha)
183 |
184 | obs.append(np.array([2,2]), 1, 1)
185 |
186 | cb = np.array([0, 0])
187 | wb = 1.0
188 | hb = 1.0
189 |
190 | self.assertEqual(obs.distance_to_rectangle(cb, wb, hb), sqrt(8) - 2*sqrt(2*0.5**2))
191 |
192 |
193 | def test_rectangle_to_object_distance3(self):
194 | ca = np.array([-2, -2])
195 | wa = 1.0
196 | ha = 1.0
197 | obs = Obstacle(ca,wa,ha)
198 |
199 | obs.append(np.array([2,2]), 1, 1)
200 | obs.append(np.array([1,1]), 1, 1)
201 |
202 | cb = np.array([0, 0])
203 | wb = 1.0
204 | hb = 1.0
205 |
206 | self.assertEqual(obs.distance_to_rectangle(cb, wb, hb), 0)
207 |
208 |
209 | def test_object_to_object_distance(self):
210 | ca = np.array([-2, -2])
211 | wa = 1.0
212 | ha = 1.0
213 | obs = Obstacle(ca,wa,ha)
214 | obs.append(np.array([2,2]), 1, 1)
215 | obs.append(np.array([1,1]), 1, 1)
216 |
217 | cb = np.array([0, 0])
218 | wb = 1.0
219 | hb = 1.0
220 | obs2 = Obstacle(cb,wb,hb)
221 |
222 | self.assertEqual(obs.distance_to_obstacle(obs2), 0)
223 |
224 |
225 | def test_object_to_object_distance2(self):
226 | ca = np.array([-2, -2])
227 | wa = 1.0
228 | ha = 1.0
229 | obs = Obstacle(ca,wa,ha)
230 | obs.append(np.array([2,2]), 1, 1)
231 | obs.append(np.array([1,1]), 1, 1)
232 |
233 | cb = np.array([4, 2])
234 | wb = 1.0
235 | hb = 1.0
236 | obs2 = Obstacle(cb,wb,hb)
237 | obs2.append(np.array([1,1]), 1, 1)
238 |
239 | self.assertEqual(obs.distance_to_obstacle(obs2), 0)
240 |
241 |
242 | def test_object_to_object_distance3(self):
243 | ca = np.array([-2, -2])
244 | wa = 1.0
245 | ha = 1.0
246 | obs = Obstacle(ca,wa,ha)
247 | obs.append(np.array([2,2]), 1, 1)
248 | obs.append(np.array([1,1]), 1, 1)
249 |
250 | cb = np.array([4, 2])
251 | wb = 1.0
252 | hb = 1.0
253 | obs2 = Obstacle(cb,wb,hb)
254 | obs2.append(np.array([10,10]), 1, 1)
255 | obs2.append(np.array([0,2.1]), 1, 1)
256 |
257 | self.assertAlmostEqual(obs.distance_to_obstacle(obs2), 0.1)
258 |
259 |
260 | def test_merge_rectangles(self):
261 | centers = np.array([[-2,-2], [2,2], [0,0], [10,10], [11,11]])
262 | widths = np.array([2,2,2,1,1]).reshape((len(centers),1))
263 | heights = np.array([2,2,2,1,1]).reshape((len(centers),1))
264 |
265 |
266 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5])
267 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
268 |
269 | self.assertEqual(len(obstacles), 2)
270 |
271 |
272 | def test_merge_rectangles2(self):
273 | centers = np.array([[-2,-2], [2,2], [0,0], [11,11], [10,10]])
274 | widths = np.array([2,2,2,1,1]).reshape((len(centers),1))
275 | heights = np.array([2,2,2,1,1]).reshape((len(centers),1))
276 |
277 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5])
278 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
279 |
280 | self.assertEqual(len(obstacles), 2)
281 |
282 |
283 | def test_merge_rectangles3(self):
284 | centers = np.array([[-2,-2], [2,2], [0,0], [10,10], [12,12], [11,11]])
285 | widths = np.array([2,2,2,1,1,1]).reshape((len(centers),1))
286 | heights = np.array([2,2,2,1,1,1]).reshape((len(centers),1))
287 |
288 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5])
289 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
290 |
291 | self.assertEqual(len(obstacles), 2)
292 |
293 | def test_merge_rectangles4(self):
294 | centers = np.array([[-2,-2], [2,2], [0,0], [10,10], [13,13], [11,11], [12,12]])
295 | widths = np.array([2,2,2,1,1,1,1]).reshape((len(centers),1))
296 | heights = np.array([2,2,2,1,1,1,1]).reshape((len(centers),1))
297 |
298 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5])
299 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
300 |
301 | self.assertEqual(len(obstacles), 2)
302 |
303 |
304 | def test_merge_rectangles5(self):
305 | centers = np.array([[-2,-2], [2,2], [0,0], [10,10], [13,13], [12,12]])
306 | widths = np.array([2,2,2,1,1,1]).reshape((len(centers),1))
307 | heights = np.array([2,2,2,1,1,1]).reshape((len(centers),1))
308 |
309 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5])
310 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
311 |
312 | self.assertEqual(len(obstacles), 3)
313 |
314 |
315 | def test_point_in_free_space(self):
316 | centers = np.array([[0,0]])
317 | widths = np.array([10]).reshape((len(centers),1))
318 | heights = np.array([10]).reshape((len(centers),1))
319 |
320 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5])
321 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
322 |
323 | env = Environment(eg.x_range, eg.y_range, list(obstacles.values()))
324 |
325 | self.assertTrue(env.point_is_in_free_space(6,6))
326 | self.assertTrue(env.point_is_in_free_space(5,5))
327 |
328 | self.assertFalse(env.point_is_in_free_space(4,4))
329 | self.assertFalse(env.point_is_in_free_space(0.5,0.5))
330 |
331 | self.assertTrue(env.point_is_in_free_space(-4,-4))
332 | self.assertTrue(env.point_is_in_free_space(-5,-5))
333 | self.assertTrue(env.point_is_in_free_space(-6,-6))
334 |
335 |
336 | def test_segment_in_free_space(self):
337 | centers = np.array([[0,0]])
338 | widths = np.array([10]).reshape((len(centers),1))
339 | heights = np.array([10]).reshape((len(centers),1))
340 |
341 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5])
342 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
343 |
344 | env = Environment(eg.x_range, eg.y_range, list(obstacles.values()))
345 |
346 | self.assertFalse(env.segment_is_in_free_space(-5, 1, 5, 1))
347 | self.assertTrue(env.segment_is_in_free_space(-5,-4, 5, -4))
348 |
349 |
350 | def test_save_read(self):
351 | centers = np.array([[4,4], [10,10]])
352 | widths = np.array([1,1]).reshape((len(centers),1))
353 | heights = np.array([3,3]).reshape((len(centers),1))
354 |
355 | eg = EnvironmentGenerator([0, 100], [0, 100], [1,5], [1, 5])
356 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2)
357 |
358 | env = Environment(eg.x_range, eg.y_range, list(obstacles.values()))
359 |
360 | centers2 = np.array([[5,5], [10,10]])
361 | widths2 = np.array([1,1]).reshape((len(centers2),1))
362 | heights2 = np.array([2,2]).reshape((len(centers2),1))
363 |
364 | eg2 = EnvironmentGenerator([0, 100], [0, 100], [1,5], [1, 5])
365 | obstacles2 = eg2.merge_rectangles_into_obstacles(centers2, widths2, heights2, epsilon=0.2)
366 |
367 | env2 = Environment(eg2.x_range, eg2.y_range, list(obstacles2.values()))
368 |
369 | ec = EnvironmentCollection()
370 | ec.x_range = [0, 100]
371 | ec.y_range = [0, 100]
372 | ec.width_range = [1,5]
373 | ec.height_range = [1,5]
374 | ec.num_environments = 0
375 | ec.map_collection = {0: env, 1: env2}
376 |
377 | ec.save('./temp.pkl')
378 |
379 | ec2 = EnvironmentCollection()
380 | ec2.read('./temp.pkl')
381 |
382 | self.assertEqual(ec2.x_range, ec.x_range)
383 | self.assertEqual(ec2.y_range, ec.y_range)
384 | self.assertEqual(ec2.map_collection, ec.map_collection)
385 |
386 | os.remove('./temp.pkl')
387 |
388 |
389 | if __name__ == '__main__':
390 | unittest.main()
391 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/__init__.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import os
3 | import gym.envs.mujoco
4 |
5 | custom_envs = {
6 | # Pusher modifications
7 | "PusherMovingGoal-v0":
8 | dict(path='gym_extensions.continuous.mujoco.modified_arm:PusherMovingGoalEnv',
9 | max_episode_steps=100,
10 | reward_threshold=0.0,
11 | kwargs= dict()),
12 | # Pusher modifications
13 | "PusherLeftSide-v0":
14 | dict(path='gym_extensions.continuous.mujoco.modified_arm:PusherLeftSide',
15 | max_episode_steps=100,
16 | reward_threshold=0.0,
17 | kwargs= dict()),
18 | "PusherFullRange-v0":
19 | dict(path='gym_extensions.continuous.mujoco.modified_arm:PusherFullRange',
20 | max_episode_steps=100,
21 | reward_threshold=0.0,
22 | kwargs= dict()),
23 | # Striker
24 | "StrikerMovingStart-v0":
25 | dict(path='gym_extensions.continuous.mujoco.modified_arm:StrikerMovingStartStateEnv',
26 | max_episode_steps=100,
27 | reward_threshold=0.0,
28 | kwargs= dict()),
29 |
30 | # modified gravity - Hopper
31 | "AntGravityMars-v0" :
32 | dict(path='gym_extensions.continuous.mujoco.modified_ant:AntGravityEnv',
33 | max_episode_steps=1000,
34 | reward_threshold=3800.0,
35 | kwargs= dict(gravity=-3.711)),
36 | "AntGravityHalf-v0" :
37 | dict(path='gym_extensions.continuous.mujoco.modified_ant:AntGravityEnv',
38 | max_episode_steps=1000,
39 | reward_threshold=3800.0,
40 | kwargs= dict(gravity=-4.905)),
41 | "AntGravityOneAndHalf-v0" :
42 | dict(path='gym_extensions.continuous.mujoco.modified_ant:AntGravityEnv',
43 | max_episode_steps=1000,
44 | reward_threshold=3800.0,
45 | kwargs= dict(gravity=-14.715)),
46 |
47 | "HopperGravityHalf-v0" :
48 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperGravityEnv',
49 | max_episode_steps=1000,
50 | reward_threshold=3800.0,
51 | kwargs= dict(gravity=-4.905)),
52 | "HopperGravityThreeQuarters-v0" :
53 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperGravityEnv',
54 | max_episode_steps=1000,
55 | reward_threshold=3800.0,
56 | kwargs= dict(gravity=-7.3575)),
57 | "HopperGravityOneAndHalf-v0" :
58 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperGravityEnv',
59 | max_episode_steps=1000,
60 | reward_threshold=3800.0,
61 | kwargs= dict(gravity=-14.715)),
62 | "HopperGravityOneAndQuarter-v0" :
63 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperGravityEnv',
64 | max_episode_steps=1000,
65 | reward_threshold=3800.0,
66 | kwargs= dict(gravity=-12.2625)),
67 |
68 | "Walker2dGravityHalf-v0" :
69 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dGravityEnv',
70 | max_episode_steps=1000,
71 | kwargs= dict(gravity=-4.905)),
72 | "Walker2dGravityThreeQuarters-v0" :
73 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dGravityEnv',
74 | max_episode_steps=1000,
75 | kwargs= dict(gravity=-7.3575)),
76 | "Walker2dGravityOneAndHalf-v0" :
77 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dGravityEnv',
78 | max_episode_steps=1000,
79 | kwargs= dict(gravity=-14.715)),
80 | "Walker2dGravityOneAndQuarter-v0" :
81 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dGravityEnv',
82 | max_episode_steps=1000,
83 | kwargs= dict(gravity=-12.2625)),
84 |
85 | "HalfCheetahGravityHalf-v0" :
86 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahGravityEnv',
87 | max_episode_steps=1000,
88 | reward_threshold=4800.0,
89 | kwargs= dict(gravity=-4.905)),
90 | "HalfCheetahGravityThreeQuarters-v0" :
91 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahGravityEnv',
92 | max_episode_steps=1000,
93 | reward_threshold=4800.0,
94 | kwargs= dict(gravity=-7.3575)),
95 | "HalfCheetahGravityOneAndHalf-v0" :
96 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahGravityEnv',
97 | max_episode_steps=1000,
98 | reward_threshold=4800.0,
99 | kwargs= dict(gravity=-14.715)),
100 | "HalfCheetahGravityOneAndQuarter-v0" :
101 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahGravityEnv',
102 | max_episode_steps=1000,
103 | reward_threshold=4800.0,
104 | kwargs= dict(gravity=-12.2625)),
105 |
106 | "HumanoidGravityHalf-v0" :
107 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidGravityEnv',
108 | max_episode_steps=1000,
109 | kwargs= dict(gravity=-4.905)),
110 | "HumanoidGravityThreeQuarters-v0" :
111 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidGravityEnv',
112 | max_episode_steps=1000,
113 | kwargs= dict(gravity=-7.3575)),
114 | "HumanoidGravityOneAndHalf-v0" :
115 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidGravityEnv',
116 | max_episode_steps=1000,
117 | kwargs= dict(gravity=-14.715)),
118 | "HumanoidGravityOneAndQuarter-v0" :
119 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidGravityEnv',
120 | max_episode_steps=1000,
121 | kwargs= dict(gravity=-12.2625)),
122 |
123 |
124 |
125 | ### Environment with walls
126 | "AntMaze-v0" :
127 | dict(path='gym_extensions.continuous.mujoco.modified_ant:AntMaze',
128 | max_episode_steps=1000,
129 | reward_threshold=3800.0,
130 | kwargs= dict()),
131 | "HopperStairs-v0" :
132 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperStairs',
133 | max_episode_steps=1000,
134 | reward_threshold=3800.0,
135 | kwargs= dict()),
136 | "HopperSimpleWall-v0" :
137 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperSimpleWallEnv',
138 | max_episode_steps=1000,
139 | reward_threshold=3800.0,
140 | kwargs= dict()),
141 |
142 | "HopperWithSensor-v0" :
143 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperWithSensorEnv',
144 | max_episode_steps=1000,
145 | reward_threshold=3800.0,
146 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml")),
147 | "Walker2dWall-v0" :
148 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dWallEnv',
149 | max_episode_steps=1000,
150 | kwargs= dict()),
151 | "Walker2dWithSensor-v0" :
152 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dWithSensorEnv',
153 | max_episode_steps=1000,
154 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/walker2d.xml")),
155 | "HalfCheetahWall-v0" :
156 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahWallEnv',
157 | max_episode_steps=1000,
158 | reward_threshold=4800.0,
159 | kwargs= dict()),
160 | "HalfCheetahWithSensor-v0" :
161 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahWithSensorEnv',
162 | max_episode_steps=1000,
163 | reward_threshold=4800.0,
164 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml")),
165 | "HumanoidWall-v0" :
166 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidWallEnv',
167 | max_episode_steps=1000,
168 | kwargs= dict()),
169 | "HumanoidWithSensor-v0" :
170 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidWithSensorEnv',
171 | max_episode_steps=1000,
172 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoid.xml")),
173 | "HumanoidStandupWithSensor-v0" :
174 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidStandupWithSensorEnv',
175 | max_episode_steps=1000,
176 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml")),
177 | "HumanoidStandupAndRunWall-v0" :
178 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidStandupAndRunWallEnv',
179 | max_episode_steps=1000,
180 | kwargs= dict()),
181 | "HumanoidStandupAndRunWithSensor-v0" :
182 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidStandupAndRunEnvWithSensor',
183 | max_episode_steps=1000,
184 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml")),
185 | "HumanoidStandupAndRun-v0" :
186 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidStandupAndRunEnv',
187 | max_episode_steps=1000,
188 | kwargs= dict()),
189 |
190 | # Modified body parts - Hopper
191 | "HopperBigTorso-v0" :
192 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv',
193 | max_episode_steps=1000,
194 | reward_threshold=3800.0,
195 | kwargs= dict(body_parts=["torso_geom"], size_scale=1.25)),
196 | "HopperBigThigh-v0" :
197 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv',
198 | max_episode_steps=1000,
199 | reward_threshold=3800.0,
200 | kwargs= dict(body_parts=["thigh_geom"], size_scale=1.25)),
201 | "HopperBigLeg-v0" :
202 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv',
203 | max_episode_steps=1000,
204 | reward_threshold=3800.0,
205 | kwargs= dict(body_parts=["leg_geom"], size_scale=1.25)),
206 | "HopperBigFoot-v0" :
207 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv',
208 | max_episode_steps=1000,
209 | reward_threshold=3800.0,
210 | kwargs= dict(body_parts=["foot_geom"], size_scale=1.25)),
211 | "HopperSmallTorso-v0" :
212 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv',
213 | max_episode_steps=1000,
214 | reward_threshold=3800.0,
215 | kwargs= dict(body_parts=["torso_geom"], size_scale=.75)),
216 | "HopperSmallThigh-v0" :
217 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv',
218 | max_episode_steps=1000,
219 | reward_threshold=3800.0,
220 | kwargs= dict(body_parts=["thigh_geom"], size_scale=.75)),
221 | "HopperSmallLeg-v0" :
222 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv',
223 | max_episode_steps=1000,
224 | reward_threshold=3800.0,
225 | kwargs= dict(body_parts=["leg_geom"], size_scale=.75)),
226 | "HopperSmallFoot-v0" :
227 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv',
228 | max_episode_steps=1000,
229 | reward_threshold=3800.0,
230 | kwargs= dict(body_parts=["foot_geom"], size_scale=.75)),
231 |
232 | # Modified body parts - Walker
233 | "Walker2dBigTorso-v0" :
234 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv',
235 | max_episode_steps=1000,
236 | kwargs= dict(body_parts=["torso_geom"], size_scale=1.25)),
237 | "Walker2dBigThigh-v0" :
238 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv',
239 | max_episode_steps=1000,
240 | kwargs= dict(body_parts=["thigh_geom", "thigh_left_geom"], size_scale=1.25)),
241 | "Walker2dBigLeg-v0" :
242 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv',
243 | max_episode_steps=1000,
244 | kwargs= dict(body_parts=["leg_geom", "leg_left_geom"], size_scale=1.25)),
245 | "Walker2dBigFoot-v0" :
246 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv',
247 | max_episode_steps=1000,
248 | kwargs= dict(body_parts=["foot_geom", "foot_left_geom"], size_scale=1.25)),
249 | "Walker2dSmallTorso-v0" :
250 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv',
251 | max_episode_steps=1000,
252 | kwargs= dict(body_parts=["torso_geom"], size_scale=.75)),
253 | "Walker2dSmallThigh-v0" :
254 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv',
255 | max_episode_steps=1000,
256 | kwargs= dict(body_parts=["thigh_geom", "thigh_left_geom"], size_scale=.75)),
257 | "Walker2dSmallLeg-v0" :
258 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv',
259 | max_episode_steps=1000,
260 | kwargs= dict(body_parts=["leg_geom", "leg_left_geom"], size_scale=.75)),
261 | "Walker2dSmallFoot-v0" :
262 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv',
263 | max_episode_steps=1000,
264 | kwargs= dict(body_parts=["foot_geom", "foot_left_geom"], size_scale=.75)),
265 |
266 | # Modified body parts - HalfCheetah
267 | "HalfCheetahBigTorso-v0" :
268 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
269 | max_episode_steps=1000,
270 | reward_threshold=4800.0,
271 | kwargs= dict(body_parts=["torso"], size_scale=1.25)),
272 | "HalfCheetahBigThigh-v0" :
273 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
274 | max_episode_steps=1000,
275 | reward_threshold=4800.0,
276 | kwargs= dict(body_parts=["fthigh", "bthigh"], size_scale=1.25)),
277 | "HalfCheetahBigLeg-v0" :
278 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
279 | max_episode_steps=1000,
280 | reward_threshold=4800.0,
281 | kwargs= dict(body_parts=["fshin", "bshin"], size_scale=1.25)),
282 | "HalfCheetahBigFoot-v0" :
283 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
284 | max_episode_steps=1000,
285 | reward_threshold=4800.0,
286 | kwargs= dict(body_parts=["ffoot", "bfoot"], size_scale=1.25)),
287 | "HalfCheetahSmallTorso-v0" :
288 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
289 | max_episode_steps=1000,
290 | reward_threshold=4800.0,
291 | kwargs= dict(body_parts=["torso"], size_scale=.75)),
292 | "HalfCheetahSmallThigh-v0" :
293 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
294 | max_episode_steps=1000,
295 | reward_threshold=4800.0,
296 | kwargs= dict(body_parts=["fthigh", "bthigh"], size_scale=.75)),
297 | "HalfCheetahSmallLeg-v0" :
298 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
299 | max_episode_steps=1000,
300 | reward_threshold=4800.0,
301 | kwargs= dict(body_parts=["fshin", "bshin"], size_scale=.75)),
302 | "HalfCheetahSmallFoot-v0" :
303 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
304 | max_episode_steps=1000,
305 | reward_threshold=4800.0,
306 | kwargs= dict(body_parts=["ffoot", "bfoot"], size_scale=.75)),
307 | "HalfCheetahSmallHead-v0" :
308 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
309 | max_episode_steps=1000,
310 | reward_threshold=4800.0,
311 | kwargs= dict(body_parts=["head"], size_scale=.75)),
312 | "HalfCheetahBigHead-v0" :
313 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv',
314 | max_episode_steps=1000,
315 | reward_threshold=4800.0,
316 | kwargs= dict(body_parts=["head"], size_scale=1.25)),
317 |
318 |
319 | # Modified body parts - Humanoid
320 | "HumanoidBigTorso-v0" :
321 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
322 | max_episode_steps=1000,
323 | kwargs= dict(body_parts=["torso1", "uwaist", "lwaist"], size_scale=1.25)),
324 | "HumanoidBigThigh-v0" :
325 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
326 | max_episode_steps=1000,
327 | kwargs= dict(body_parts=["right_thigh1", "left_thigh1", "butt"], size_scale=1.25)),
328 | "HumanoidBigLeg-v0" :
329 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
330 | max_episode_steps=1000,
331 | kwargs= dict(body_parts=["right_shin1", "left_shin1"], size_scale=1.25)),
332 | "HumanoidBigFoot-v0" :
333 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
334 | max_episode_steps=1000,
335 | kwargs= dict(body_parts=["left_foot", "right_foot"], size_scale=1.25)),
336 | "HumanoidSmallTorso-v0" :
337 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
338 | max_episode_steps=1000,
339 | kwargs= dict(body_parts=["torso1", "uwaist", "lwaist"], size_scale=.75)),
340 | "HumanoidSmallThigh-v0" :
341 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
342 | max_episode_steps=1000,
343 | kwargs= dict(body_parts=["right_thigh1", "left_thigh1", "butt"], size_scale=.75)),
344 | "HumanoidSmallLeg-v0" :
345 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
346 | max_episode_steps=1000,
347 | kwargs= dict(body_parts=["right_shin1", "left_shin1"], size_scale=.75)),
348 | "HumanoidSmallFoot-v0" :
349 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
350 | max_episode_steps=1000,
351 | kwargs= dict(body_parts=["left_foot", "right_foot"], size_scale=.75)),
352 | "HumanoidSmallHead-v0" :
353 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
354 | max_episode_steps=1000,
355 | kwargs= dict(body_parts=["head"], size_scale=.75)),
356 | "HumanoidBigHead-v0" :
357 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
358 | max_episode_steps=1000,
359 | kwargs= dict(body_parts=["head"], size_scale=1.25)),
360 | "HumanoidSmallArm-v0" :
361 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
362 | max_episode_steps=1000,
363 | kwargs= dict(body_parts=["right_uarm1", "right_larm", "left_uarm1", "left_larm"], size_scale=.75)),
364 | "HumanoidBigArm-v0" :
365 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
366 | max_episode_steps=1000,
367 | kwargs= dict(body_parts=["right_uarm1", "right_larm", "left_uarm1", "left_larm"], size_scale=1.25)),
368 | "HumanoidSmallHand-v0" :
369 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
370 | max_episode_steps=1000,
371 | kwargs= dict(body_parts=["left_hand", "right_hand"], size_scale=.75)),
372 | "HumanoidBigHand-v0" :
373 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv',
374 | max_episode_steps=1000,
375 | kwargs= dict(body_parts=["left_hand", "right_hand"], size_scale=1.25)),
376 | }
377 |
378 | def register_custom_envs():
379 | for key, value in custom_envs.items():
380 | arg_dict = dict(id=key,
381 | entry_point=value["path"],
382 | max_episode_steps=value["max_episode_steps"],
383 | kwargs=value["kwargs"])
384 |
385 | if "reward_threshold" in value:
386 | arg_dict["reward_threshold"] = value["reward_threshold"]
387 |
388 | gym.envs.register(**arg_dict)
389 |
390 | register_custom_envs()
391 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/gravity_envs.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import tempfile
3 | import xml.etree.ElementTree as ET
4 | import math
5 |
6 | import numpy as np
7 | import gym
8 | import random
9 | import os
10 | from gym import utils
11 | from gym.envs.mujoco import mujoco_env
12 | import mujoco_py
13 |
14 | def GravityEnvFactory(class_type):
15 | """class_type should be an OpenAI gym type"""
16 |
17 | class GravityEnv(class_type, utils.EzPickle):
18 | """
19 | Allows the gravity to be changed by the
20 | """
21 | def __init__(
22 | self,
23 | model_path,
24 | gravity=-9.81,
25 | *args,
26 | **kwargs):
27 | class_type.__init__(self, model_path=model_path)
28 | utils.EzPickle.__init__(self)
29 |
30 | # make sure we're using a proper OpenAI gym Mujoco Env
31 | assert isinstance(self, mujoco_env.MujocoEnv)
32 |
33 | self.model.opt.gravity = (mujoco_py.mjtypes.c_double * 3)(*[0., 0., gravity])
34 | self.model._compute_subtree()
35 | self.model.forward()
36 |
37 | return GravityEnv
38 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/modified_ant.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 | import os.path as osp
5 | from gym_extensions.continuous.mujoco.wall_envs import MazeFactory
6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory
7 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory
8 |
9 | from gym.envs.mujoco.ant import AntEnv
10 |
11 | import os
12 | import gym
13 |
14 |
15 | AntGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedAntEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", *args, **kwargs)
16 |
17 |
18 | AntMaze = lambda *args, **kwargs : MazeFactory(ModifiedAntEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", ori_ind=0, *args, **kwargs)
19 |
20 |
21 |
22 |
23 | class ModifiedAntEnv(AntEnv, utils.EzPickle):
24 | """
25 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
26 | """
27 | def __init__(self, **kwargs):
28 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4)
29 | utils.EzPickle.__init__(self)
30 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/modified_arm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 | import os.path as osp
5 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory
6 | from gym.envs.mujoco.pusher import PusherEnv
7 | from gym.envs.mujoco.striker import StrikerEnv
8 |
9 | import os
10 | import gym
11 | import random
12 | import six
13 |
14 | class PusherFullRange(PusherEnv, utils.EzPickle):
15 | """
16 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
17 | """
18 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/pusher.xml", **kwargs):
19 | mujoco_env.MujocoEnv.__init__(self, model_path, 5)
20 | utils.EzPickle.__init__(self)
21 |
22 | # make sure we're using a proper OpenAI gym Mujoco Env
23 | assert isinstance(self, mujoco_env.MujocoEnv)
24 |
25 | self.model.jnt_range = self.get_and_modify_joint_range('r_shoulder_pan_joint')
26 | self.model._compute_subtree()
27 | self.model.forward()
28 |
29 | def get_and_modify_joint_range(self, body_name, new_array=np.array([-.854, 2.714602])):
30 | idx = self.model.joint_names.index(six.b(body_name))
31 | temp = np.copy(self.model.jnt_range)
32 | temp[idx] = new_array
33 | return temp
34 |
35 | def reset_model(self):
36 | qpos = self.init_qpos
37 |
38 | goal_pos_map = { 'left' : np.array([-0.05, -1.1230]), 'right' : np.array([0.0, 0.0])}
39 | object_start_pos_map = {'left' : np.array([[-0.3, 0.0], [-1.1, -.8]]), 'right' : np.array([[-0.3, 0.0], [-0.2, 0.2]]) }
40 | position = random.choice(['left', 'right'])
41 |
42 | self.goal_pos = goal_pos_map[position]
43 | object_start_range = object_start_pos_map[position]
44 |
45 | while True:
46 |
47 | self.cylinder_pos = np.concatenate([
48 | self.np_random.uniform(low=object_start_range[0][0], high=object_start_range[0][1], size=1),
49 | self.np_random.uniform(low=object_start_range[1][0], high=object_start_range[1][1], size=1)])
50 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
51 | break
52 |
53 | qpos[-4:-2] = self.cylinder_pos
54 | qpos[-2:] = self.goal_pos
55 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
56 | high=0.005, size=self.model.nv)
57 | qvel[-4:] = 0
58 | self.set_state(qpos, qvel)
59 | return self._get_obs()
60 |
61 |
62 | class PusherLeftSide(PusherEnv, utils.EzPickle):
63 | """
64 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
65 | """
66 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/pusher.xml", **kwargs):
67 | mujoco_env.MujocoEnv.__init__(self, model_path, 5)
68 | utils.EzPickle.__init__(self)
69 |
70 | # make sure we're using a proper OpenAI gym Mujoco Env
71 | assert isinstance(self, mujoco_env.MujocoEnv)
72 |
73 | self.model.jnt_range = self.get_and_modify_joint_range('r_shoulder_pan_joint')
74 | self.model._compute_subtree()
75 | self.model.forward()
76 |
77 | def get_and_modify_joint_range(self, body_name, new_array=np.array([-.854, 2.714602])):
78 | idx = self.model.joint_names.index(six.b(body_name))
79 | temp = np.copy(self.model.jnt_range)
80 | temp[idx] = new_array
81 | return temp
82 |
83 | def reset_model(self):
84 | qpos = self.init_qpos
85 | self.goal_pos = np.array([-0.05, -1.1230])
86 |
87 | while True:
88 | self.cylinder_pos = np.concatenate([
89 | self.np_random.uniform(low=-0.3, high=0, size=1),
90 | self.np_random.uniform(low=-1.1, high=-.8, size=1)])
91 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
92 | break
93 |
94 | qpos[-4:-2] = self.cylinder_pos
95 | qpos[-2:] = self.goal_pos
96 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
97 | high=0.005, size=self.model.nv)
98 | qvel[-4:] = 0
99 | self.set_state(qpos, qvel)
100 | return self._get_obs()
101 |
102 | class PusherMovingGoalEnv(PusherEnv, utils.EzPickle):
103 | """
104 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
105 | """
106 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/pusher.xml", **kwargs):
107 | mujoco_env.MujocoEnv.__init__(self, model_path, 5)
108 | utils.EzPickle.__init__(self)
109 |
110 | def reset_model(self):
111 | qpos = self.init_qpos
112 |
113 | self.goal_pos = np.concatenate([
114 | self.np_random.uniform(low=-0.2, high=0, size=1),
115 | self.np_random.uniform(low=-0.1, high=0.3, size=1)])
116 |
117 | while True:
118 | self.cylinder_pos = np.concatenate([
119 | self.np_random.uniform(low=-0.3, high=0, size=1),
120 | self.np_random.uniform(low=-0.2, high=0.2, size=1)])
121 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
122 | break
123 |
124 | qpos[-4:-2] = self.cylinder_pos
125 | qpos[-2:] = self.goal_pos
126 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
127 | high=0.005, size=self.model.nv)
128 | qvel[-4:] = 0
129 | self.set_state(qpos, qvel)
130 | return self._get_obs()
131 |
132 |
133 | class StrikerMovingStartStateEnv(StrikerEnv, utils.EzPickle):
134 | """
135 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
136 | """
137 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/striker.xml", **kwargs):
138 | self._striked = False
139 | self._min_strike_dist = np.inf
140 | self.strike_threshold = 0.1
141 | mujoco_env.MujocoEnv.__init__(self, model_path, 5)
142 | utils.EzPickle.__init__(self)
143 |
144 | def reset_model(self):
145 | self._min_strike_dist = np.inf
146 | self._striked = False
147 | self._strike_pos = None
148 |
149 | qpos = self.init_qpos
150 |
151 | self.ball = np.concatenate([
152 | self.np_random.uniform(low=0.43, high=0.55, size=1),
153 | self.np_random.uniform(low=-0.05, high=-.3, size=1)])
154 |
155 | while True:
156 | self.goal = np.concatenate([
157 | self.np_random.uniform(low=0.15, high=0.7, size=1),
158 | self.np_random.uniform(low=0.1, high=1.0, size=1)])
159 | if np.linalg.norm(self.ball - self.goal) > 0.17:
160 | break
161 |
162 | qpos[-9:-7] = [self.ball[1], self.ball[0]]
163 | qpos[-7:-5] = self.goal
164 | diff = self.ball - self.goal
165 | angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
166 | qpos[-1] = angle / 3.14
167 | qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
168 | size=self.model.nv)
169 | qvel[7:] = 0
170 | self.set_state(qpos, qvel)
171 | return self._get_obs()
172 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/modified_half_cheetah.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 | import os.path as osp
5 | from gym_extensions.continuous.mujoco.wall_envs import WallEnvFactory
6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory
7 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
8 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory
9 |
10 | import os
11 | import gym
12 |
13 | HalfCheetahWallEnv = lambda *args, **kwargs : WallEnvFactory(ModifiedHalfCheetahEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml", ori_ind=-1, *args, **kwargs)
14 |
15 | HalfCheetahGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedHalfCheetahEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml", *args, **kwargs)
16 |
17 | HalfCheetahModifiedBodyPartSizeEnv = lambda *args, **kwargs : ModifiedSizeEnvFactory(ModifiedHalfCheetahEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml", *args, **kwargs)
18 |
19 |
20 | class ModifiedHalfCheetahEnv(HalfCheetahEnv, utils.EzPickle):
21 | """
22 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
23 | """
24 | def __init__(self, **kwargs):
25 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5)
26 | utils.EzPickle.__init__(self)
27 |
28 | class HalfCheetahWithSensorEnv(HalfCheetahEnv, utils.EzPickle):
29 | """
30 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall
31 | """
32 |
33 | def __init__(self, n_bins=10, **kwargs):
34 | self.n_bins = n_bins
35 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5)
36 | utils.EzPickle.__init__(self)
37 |
38 |
39 | def _get_obs(self):
40 | obs = np.concatenate([
41 | HalfCheetahEnv._get_obs(self),
42 | np.zeros(self.n_bins)
43 | # goal_readings
44 | ])
45 | return obs
46 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/modified_hopper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 | import os.path as osp
5 | from gym_extensions.continuous.mujoco.wall_envs import *
6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory
7 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory
8 |
9 | from gym.envs.mujoco.hopper import HopperEnv
10 |
11 | import os
12 | import gym
13 |
14 | HopperSimpleWallEnv = lambda *args, **kwargs : SimpleWallEnvFactory(ModifiedHopperEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml", ori_ind=-1, *args, **kwargs)
15 |
16 | HopperStairs = lambda *args, **kwargs : StairsFactory(ModifiedHopperEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml", ori_ind=-1, *args, **kwargs)
17 |
18 | HopperGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedHopperEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml", *args, **kwargs)
19 |
20 | HopperModifiedBodyPartSizeEnv = lambda *args, **kwargs : ModifiedSizeEnvFactory(ModifiedHopperEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml", *args, **kwargs)
21 |
22 |
23 | class ModifiedHopperEnv(HopperEnv, utils.EzPickle):
24 | """
25 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
26 | """
27 | def __init__(self, **kwargs):
28 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4)
29 | utils.EzPickle.__init__(self)
30 |
31 | class HopperWithSensorEnv(HopperEnv, utils.EzPickle):
32 | """
33 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall
34 | """
35 |
36 | def __init__(self, n_bins=10, **kwargs):
37 | self.n_bins = n_bins
38 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4)
39 | utils.EzPickle.__init__(self)
40 |
41 |
42 | def _get_obs(self):
43 | obs = np.concatenate([
44 | HopperEnv._get_obs(self),
45 | np.zeros(self.n_bins)
46 | # goal_readings
47 | ])
48 | return obs
49 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/modified_humanoid.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 | import os.path as osp
5 | from gym_extensions.continuous.mujoco.wall_envs import WallEnvFactory
6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory
7 | from gym.envs.mujoco.humanoid import HumanoidEnv, mass_center
8 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
9 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory
10 |
11 | import os
12 | import gym
13 |
14 | HumanoidWallEnv = lambda *args, **kwargs : WallEnvFactory(ModifiedHumanoidEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoid.xml", ori_ind=-1, *args, **kwargs)
15 |
16 | HumanoidStandupAndRunWallEnv = lambda *args, **kwargs : WallEnvFactory(HumanoidStandupAndRunEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml", ori_ind=-1, *args, **kwargs)
17 |
18 | HumanoidGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedHumanoidEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoid.xml", *args, **kwargs)
19 |
20 | HumanoidModifiedBodyPartSizeEnv = lambda *args, **kwargs : ModifiedSizeEnvFactory(ModifiedHumanoidEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoid.xml", *args, **kwargs)
21 |
22 |
23 | class ModifiedHumanoidEnv(HumanoidEnv, utils.EzPickle):
24 | """
25 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
26 | """
27 | def __init__(self, **kwargs):
28 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5)
29 | utils.EzPickle.__init__(self)
30 |
31 | class HumanoidWithSensorEnv(HumanoidEnv, utils.EzPickle):
32 | """
33 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall
34 | """
35 |
36 | def __init__(self, n_bins=10, **kwargs):
37 | self.n_bins = n_bins
38 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5)
39 | utils.EzPickle.__init__(self)
40 |
41 |
42 | def _get_obs(self):
43 | obs = np.concatenate([
44 | HumanoidEnv._get_obs(self),
45 | np.zeros(self.n_bins)
46 | # goal_readings
47 | ])
48 | return obs
49 |
50 |
51 | class HumanoidStandupWithSensorEnv(HumanoidStandupEnv, utils.EzPickle):
52 | """
53 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall
54 | """
55 |
56 | def __init__(self, n_bins=10, **kwargs):
57 | self.n_bins = n_bins
58 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5)
59 | utils.EzPickle.__init__(self)
60 |
61 |
62 | def _get_obs(self):
63 | obs = np.concatenate([
64 | HumanoidStandupEnv._get_obs(self),
65 | np.zeros(self.n_bins)
66 | # goal_readings
67 | ])
68 | return obs
69 |
70 | class HumanoidStandupAndRunEnv(HumanoidEnv, utils.EzPickle):
71 | """
72 | Environment that rewards standing up and the running, this is the combination of Humanoid-v1 and HumanoidStandup-v1
73 | """
74 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml", **kwargs):
75 | mujoco_env.MujocoEnv.__init__(self, model_path, 5)
76 | utils.EzPickle.__init__(self)
77 |
78 | def _step(self, a):
79 | pos_before = mass_center(self.model)
80 | self.do_simulation(a, self.frame_skip)
81 | pos_after = mass_center(self.model)
82 |
83 | pos_after_standup = self.model.data.qpos[2][0]
84 |
85 | down = bool(( self.model.data.qpos[2] < 1.0) or ( self.model.data.qpos[2] > 2.0))
86 |
87 | alive_bonus = 5.0 if not down else 1.0
88 |
89 | data = self.model.data
90 |
91 | uph_cost = (pos_after_standup - 0) / self.model.opt.timestep
92 | lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
93 |
94 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
95 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
96 | quad_impact_cost = min(quad_impact_cost, 10)
97 |
98 | reward = lin_vel_cost + uph_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
99 | qpos = self.model.data.qpos
100 |
101 | done = bool(False)
102 | return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost, reward_alive=alive_bonus)
103 |
104 |
105 | class HumanoidStandupAndRunEnvWithSensor(HumanoidStandupAndRunEnv, utils.EzPickle):
106 | """
107 | Environment that rewards standing up and the running, this is the combination of Humanoid-v1 and HumanoidStandup-v1, also adds empty sensor
108 | readouts to the state space to make compatible with transfer learning for the WallEnv variant
109 | """
110 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml", n_bins=10, **kwargs):
111 | self.n_bins = n_bins
112 | mujoco_env.MujocoEnv.__init__(self, model_path, 5)
113 | utils.EzPickle.__init__(self)
114 |
115 | def _get_obs(self):
116 | obs = np.concatenate([
117 | HumanoidStandupAndRunEnv._get_obs(self),
118 | np.zeros(self.n_bins)
119 | # goal_readings
120 | ])
121 | return obs
122 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/modified_walker2d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 | import os.path as osp
5 | from gym_extensions.continuous.mujoco.wall_envs import WallEnvFactory
6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory
7 | from gym.envs.mujoco.walker2d import Walker2dEnv
8 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory
9 |
10 | import os
11 | import gym
12 |
13 | Walker2dWallEnv = lambda *args, **kwargs : WallEnvFactory(ModifiedWalker2dEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/walker2d.xml", ori_ind=-1, *args, **kwargs)
14 |
15 | Walker2dGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedWalker2dEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/walker2d.xml", *args, **kwargs)
16 |
17 | Walker2dModifiedBodyPartSizeEnv = lambda *args, **kwargs : ModifiedSizeEnvFactory(ModifiedWalker2dEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/walker2d.xml", *args, **kwargs)
18 |
19 |
20 | class ModifiedWalker2dEnv(Walker2dEnv, utils.EzPickle):
21 | """
22 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
23 | """
24 | def __init__(self, **kwargs):
25 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4)
26 | utils.EzPickle.__init__(self)
27 |
28 | class Walker2dWithSensorEnv(Walker2dEnv, utils.EzPickle):
29 | """
30 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall
31 | """
32 |
33 | def __init__(self, n_bins=10, **kwargs):
34 | self.n_bins = n_bins
35 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4)
36 | utils.EzPickle.__init__(self)
37 |
38 |
39 | def _get_obs(self):
40 | obs = np.concatenate([
41 | Walker2dEnv._get_obs(self),
42 | np.zeros(self.n_bins)
43 | # goal_readings
44 | ])
45 | return obs
46 |
--------------------------------------------------------------------------------
/gym_extensions/continuous/mujoco/perturbed_bodypart_env.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import tempfile
3 | import xml.etree.ElementTree as ET
4 | import math
5 |
6 | import numpy as np
7 | import gym
8 | import random
9 | import os
10 | from gym import utils
11 | from gym.envs.mujoco import mujoco_env
12 | import mujoco_py
13 | import six
14 |
15 | def ModifiedMassEnvFactory(class_type):
16 | """class_type should be an OpenAI gym type"""
17 |
18 |
19 | class ModifiedMassEnv(class_type, utils.EzPickle):
20 | """
21 | Allows the gravity to be changed by the
22 | """
23 | def __init__(
24 | self,
25 | model_path,
26 | body_part="torso",
27 | mass_scale=1.0,
28 | *args,
29 | **kwargs):
30 | class_type.__init__(self, model_path=model_path)
31 | utils.EzPickle.__init__(self)
32 |
33 | # make sure we're using a proper OpenAI gym Mujoco Env
34 | assert isinstance(self, mujoco_env.MujocoEnv)
35 |
36 | self.model.body_mass = self.get_and_modify_bodymass(body_part, mass_scale)
37 | self.model._compute_subtree()
38 | self.model.forward()
39 |
40 | def get_and_modify_bodymass(self, body_name, scale):
41 | idx = self.model.body_names.index(six.b(body_name))
42 | temp = np.copy(self.model.body_mass)
43 | temp[idx] *= scale
44 | return temp
45 |
46 |
47 |
48 | return ModifiedMassEnv
49 |
50 | def ModifiedSizeEnvFactory(class_type):
51 | """class_type should be an OpenAI gym type"""
52 |
53 |
54 |
55 | class ModifiedSizeEnv(class_type, utils.EzPickle):
56 | """
57 | Allows the gravity to be changed by the
58 | """
59 |
60 |
61 | def __init__(
62 | self,
63 | model_path,
64 | body_parts=["torso"],
65 | size_scale=1.0,
66 | *args,
67 | **kwargs):
68 |
69 | assert isinstance(self, mujoco_env.MujocoEnv)
70 |
71 | # find the body_part we want
72 | tree = ET.parse(model_path)
73 | for body_part in body_parts:
74 | # torso = tree.find(".//body[@name='%s']" % body_part)
75 |
76 | # grab the geoms
77 | geom = tree.find(".//geom[@name='%s']" % body_part)
78 |
79 | sizes = [float(x) for x in geom.attrib["size"].split(" ")]
80 |
81 | # rescale
82 | geom.attrib["size"] = " ".join([str(x * size_scale) for x in sizes ]) # the first one should always be the thing we want.
83 |
84 | # TODO: in the future we want to also be able to make it longer or shorter, but this requires propagation of the fromto attribute
85 | # so like a middle part isn't super long but the other parts connect at the same spot.
86 |
87 | # fromto = []
88 | # for x in geoms[0].attrib["fromto"].split(" "):
89 | # fromto.append(float(x))
90 | # fromto = [x*length_scale for x in fromto]
91 | # geoms[0].attrib["fromto"] = str() * length_scale) # the first one should always be the thing we want.
92 |
93 | # create new xml
94 | _, file_path = tempfile.mkstemp(text=True)
95 | tree.write(file_path)
96 |
97 | # load the modified xml
98 | class_type.__init__(self, model_path=file_path)
99 | utils.EzPickle.__init__(self)
100 |
101 |
102 | # def get_and_modify_bodysize(self, body_name, scale):
103 | # idx = self.model.body_names.index(six.b(body_name))
104 | # temp = np.copy(self.model.geom_size)
105 | # temp[idx] *= scale
106 | # return temp
107 |
108 | return ModifiedSizeEnv
109 |
--------------------------------------------------------------------------------
/gym_extensions/discrete/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/gym_extensions/discrete/__init__.py
--------------------------------------------------------------------------------
/gym_extensions/discrete/classic/__init__.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import os
3 |
4 | custom_envs = {
5 | # Contextual environments
6 | "CartPoleContextual-v0":
7 | dict(path='gym_extensions.discrete.classic.cartpole_contextual:CartPoleContextualEnv',
8 | max_episode_steps=500,
9 | kwargs= dict()),
10 | "PendulumContextual-v0":
11 | dict(path='gym_extensions.discrete.classic.pendulum_contextual:PendulumContextualEnv',
12 | max_episode_steps=500,
13 | kwargs= dict())
14 | }
15 |
16 | def register_custom_envs():
17 | for key, value in custom_envs.items():
18 | arg_dict = dict(id=key,
19 | entry_point=value["path"],
20 | max_episode_steps=value["max_episode_steps"],
21 | kwargs=value["kwargs"])
22 |
23 | if "reward_threshold" in value:
24 | arg_dict["reward_threshold"] = value["reward_threshold"]
25 |
26 | gym.envs.register(**arg_dict)
27 |
28 | register_custom_envs()
29 |
--------------------------------------------------------------------------------
/gym_extensions/discrete/classic/cartpole.py:
--------------------------------------------------------------------------------
1 | """
2 | Classic cart-pole system implemented by Rich Sutton et al.
3 | Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
4 | """
5 |
6 | import logging
7 | import math
8 | import gym
9 | from gym import spaces
10 | from gym.utils import seeding
11 | import numpy as np
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 | def register_custom_cartpole(tag, gravity=9.8, masscart=1.0, masspole=0.1, pole_length=.5, force_mag=10.0):
16 | """
17 | Tag - What to call your env (e.g. CustomCartpoleLongPole-v0, CustomCartpoleLongPole-v1)
18 | gravity - if you want to modify the gravity factor (default 9.8)
19 | masscart - the mass of the cartpole base
20 | masspole - the mass of the pole
21 | length - the length of the pole
22 | force_mag - the magnitude of the exerted action force
23 | """
24 | return gym.envs.register(
25 | id=tag,
26 | entry_point="envs.transfer.classic.cartpole:CustomizableCartPoleEnv",
27 | max_episode_steps=200,
28 | reward_threshold=195.0,
29 | kwargs= dict(gravity = gravity, masscart = masscart, masspole = masspole, length = pole_length, force_mag = force_mag)
30 | )
31 |
32 |
33 | class CustomizableCartPoleEnv(gym.Env):
34 | metadata = {
35 | 'render.modes': ['human', 'rgb_array'],
36 | 'video.frames_per_second' : 50
37 | }
38 |
39 | def __init__(self, gravity=9.8, masscart=1.0, masspole=0.1, length = .5, force_mag = 10.0):
40 | self.gravity = gravity
41 | self.masscart = masscart
42 | self.masspole = masspole
43 | self.total_mass = (self.masspole + self.masscart)
44 | self.length = length # actually half the pole's length
45 | self.polemass_length = (self.masspole * self.length)
46 | self.force_mag = force_mag
47 | self.tau = 0.02 # seconds between state updates
48 |
49 | # Angle at which to fail the episode
50 | self.theta_threshold_radians = 12 * 2 * math.pi / 360
51 | self.x_threshold = 2.4
52 |
53 | # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
54 | high = np.array([
55 | self.x_threshold * 2,
56 | np.finfo(np.float32).max,
57 | self.theta_threshold_radians * 2,
58 | np.finfo(np.float32).max])
59 |
60 | self.action_space = spaces.Discrete(2)
61 | self.observation_space = spaces.Box(-high, high)
62 |
63 | self._seed()
64 | self.viewer = None
65 | self.state = None
66 |
67 | self.steps_beyond_done = None
68 |
69 | def _seed(self, seed=None):
70 | self.np_random, seed = seeding.np_random(seed)
71 | return [seed]
72 |
73 | def _step(self, action):
74 | assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
75 | state = self.state
76 | x, x_dot, theta, theta_dot = state
77 | force = self.force_mag if action==1 else -self.force_mag
78 | costheta = math.cos(theta)
79 | sintheta = math.sin(theta)
80 | temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
81 | thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
82 | xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
83 | x = x + self.tau * x_dot
84 | x_dot = x_dot + self.tau * xacc
85 | theta = theta + self.tau * theta_dot
86 | theta_dot = theta_dot + self.tau * thetaacc
87 | self.state = (x,x_dot,theta,theta_dot)
88 | done = x < -self.x_threshold \
89 | or x > self.x_threshold \
90 | or theta < -self.theta_threshold_radians \
91 | or theta > self.theta_threshold_radians
92 | done = bool(done)
93 |
94 | if not done:
95 | reward = 1.0
96 | elif self.steps_beyond_done is None:
97 | # Pole just fell!
98 | self.steps_beyond_done = 0
99 | reward = 1.0
100 | else:
101 | if self.steps_beyond_done == 0:
102 | logger.warning("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
103 | self.steps_beyond_done += 1
104 | reward = 0.0
105 |
106 | return np.array(self.state), reward, done, {}
107 |
108 | def _reset(self):
109 | self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
110 | self.steps_beyond_done = None
111 | return np.array(self.state)
112 |
113 | def _render(self, mode='human', close=False):
114 | if close:
115 | if self.viewer is not None:
116 | self.viewer.close()
117 | self.viewer = None
118 | return
119 |
120 | screen_width = 600
121 | screen_height = 400
122 |
123 | world_width = self.x_threshold*2
124 | scale = screen_width/world_width
125 | carty = 100 # TOP OF CART
126 | polewidth = 10.0
127 | polelen = scale * 1.0
128 | cartwidth = 50.0
129 | cartheight = 30.0
130 |
131 | if self.viewer is None:
132 | from gym.envs.classic_control import rendering
133 | self.viewer = rendering.Viewer(screen_width, screen_height)
134 | l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/2
135 | axleoffset =cartheight/4.0
136 | cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
137 | self.carttrans = rendering.Transform()
138 | cart.add_attr(self.carttrans)
139 | self.viewer.add_geom(cart)
140 | l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
141 | pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
142 | pole.set_color(.8,.6,.4)
143 | self.poletrans = rendering.Transform(translation=(0, axleoffset))
144 | pole.add_attr(self.poletrans)
145 | pole.add_attr(self.carttrans)
146 | self.viewer.add_geom(pole)
147 | self.axle = rendering.make_circle(polewidth/2)
148 | self.axle.add_attr(self.poletrans)
149 | self.axle.add_attr(self.carttrans)
150 | self.axle.set_color(.5,.5,.8)
151 | self.viewer.add_geom(self.axle)
152 | self.track = rendering.Line((0,carty), (screen_width,carty))
153 | self.track.set_color(0,0,0)
154 | self.viewer.add_geom(self.track)
155 |
156 | if self.state is None: return None
157 |
158 | x = self.state
159 | cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART
160 | self.carttrans.set_translation(cartx, carty)
161 | self.poletrans.set_rotation(-x[2])
162 |
163 | return self.viewer.render(return_rgb_array = mode=='rgb_array')
164 |
--------------------------------------------------------------------------------
/gym_extensions/discrete/classic/cartpole_contextual.py:
--------------------------------------------------------------------------------
1 | """
2 | Classic cart-pole system implemented by Rich Sutton et al.
3 | Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
4 | """
5 |
6 | import logging
7 | import math
8 | import gym
9 | from gym import spaces
10 | from gym.envs.classic_control.cartpole import CartPoleEnv
11 | from gym.utils import seeding
12 | import numpy as np
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 | def register_custom_cartpole(tag, gravity=9.8, masscart=1.0, masspole=0.1, pole_length=.5, force_mag=10.0):
17 | """
18 | Tag - What to call your env (e.g. CustomCartpoleLongPole-v0, CustomCartpoleLongPole-v1)
19 | gravity - if you want to modify the gravity factor (default 9.8)
20 | masscart - the mass of the cartpole base
21 | masspole - the mass of the pole
22 | length - the length of the pole
23 | force_mag - the magnitude of the exerted action force
24 | """
25 | return gym.envs.register(
26 | id=tag,
27 | entry_point="envs.transfer.classic.cartpole_contextual:CartPoleContextualEnv",
28 | max_episode_steps=200,
29 | reward_threshold=195.0,
30 | kwargs= dict(gravity = gravity, masscart = masscart, masspole = masspole, length = pole_length, force_mag = force_mag)
31 | )
32 |
33 | class CartPoleContextualEnv(CartPoleEnv):
34 | metadata = {
35 | 'render.modes': ['human', 'rgb_array'],
36 | 'video.frames_per_second' : 50
37 | }
38 |
39 | def __init__(self,gravity=9.8, masscart=1.0, masspole=0.1, length = .5, force_mag = 10.0):
40 | super(CartPoleContextualEnv, self).__init__()
41 | self.context = [masscart, masspole, length, force_mag]
42 | self.gravity = gravity # not including in the context for now
43 | #self.masscart = self.context[0]
44 | self.masspole = self.context[1]
45 | self.length = self.context[2]
46 | self.force_mag = self.context[3]
47 |
48 | # our own responsibility to define the range of the context, since we define it
49 | self.context_high = np.array([
50 | self.masscart * 2,
51 | self.masspole * 10,
52 | self.length * 2,
53 | self.force_mag * 2])
54 |
55 | # the params in the given context can't be less or equal to zero!
56 | self.context_low = np.array([ 0.1, 0.1, 0.1, 0.1])
57 |
58 |
59 | def _step(self, action):
60 | state, reward, done, _ = super(CartPoleContextualEnv, self)._step(action)
61 | return state, reward, done, {'masscart':self.masscart, 'masspole':self.masspole, 'pole_length':self.length, 'force_magnitude':self.force_mag}
62 |
63 |
64 | def change_context(self, context_vector):
65 | self.masscart = context_vector
66 |
67 |
68 | def context_space_info(self):
69 | context_info_dict = {}
70 |
71 | context_info_dict['context_vals'] = [1.0, 0.1, 0.5, 10.0]
72 | context_info_dict['context_high'] = self.context_high.tolist() # to make sure it can be serialized in json files
73 | context_info_dict['context_low' ] = self.context_low.tolist()
74 | context_info_dict['state_dims' ] = 4
75 | context_info_dict['action_dims' ] = 1
76 | context_info_dict['action_space'] = 'discrete'
77 | context_info_dict['state_high' ] = self.observation_space.high.tolist()
78 | context_info_dict['state_low' ] = self.observation_space.low.tolist()
79 | context_info_dict['action_high' ] = 1
80 | context_info_dict['action_low' ] = 0
81 |
82 | return context_info_dict
83 |
84 |
85 |
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/gym_extensions/discrete/classic/pendulum_contextual.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import math
3 | import gym
4 | from gym import spaces
5 | from gym.envs.classic_control.pendulum import PendulumEnv
6 | from gym.utils import seeding
7 | import numpy as np
8 | #from os import path
9 | logger = logging.getLogger(__name__)
10 |
11 | def register_custom_cartpole(tag, max_speed = 8, max_torque = 2.0):
12 | """
13 | Tag - What to call your env (e.g. CustomCartpoleLongPole-v0, CustomCartpoleLongPole-v1)
14 | gravity - if you want to modify the gravity factor (default 9.8)
15 | masscart - the mass of the cartpole base
16 | masspole - the mass of the pole
17 | length - the length of the pole
18 | force_mag - the magnitude of the exerted action force
19 | """
20 | return gym.envs.register(
21 | id=tag,
22 | entry_point="envs.transfer.classic.pendulum_contextual:PendulumContextualEnv",
23 | max_episode_steps=200,
24 | reward_threshold=195.0,
25 | kwargs= dict(max_speed = max_speed, max_torque = max_torque)
26 | )
27 |
28 | class PendulumContextualEnv(PendulumEnv):
29 | def __init__(self,max_speed=8, max_torque=2.0):
30 | super(PendulumContextualEnv, self).__init__()
31 | self.context = [max_speed, max_torque]
32 | self.max_speed = self.context[0]
33 | self.max_torque = self.context[1]
34 |
35 | # our own responsibility to define the range of the context, since we define it
36 | self.context_high = np.array([
37 | self.max_speed * 10,
38 | self.max_torque * 10])
39 |
40 | self.context_low = np.array([ 0.1, 0.1]) # the params in the context can't be less or equal to zero!
41 |
42 |
43 | def _step(self, action):
44 | state, reward, done, _ = super(PendulumContextualEnv, self)._step(action)
45 | return state, reward, done, {'max_speed':self.max_speed, 'max_torque':self.max_torque}
46 |
47 |
48 | def change_context(self, context_vector):
49 | self.max_speed = context_vector
50 |
51 |
52 | def context_space_info(self):
53 | context_info_dict = {}
54 |
55 | context_info_dict['context_vals'] = [8.0, 2.0]
56 | context_info_dict['context_high'] = self.context_high.tolist()
57 | context_info_dict['context_low' ] = self.context_low.tolist()
58 | context_info_dict['state_dims' ] = 3
59 | # I need to know what the size of the action vector I need to pass to the transition function
60 | context_info_dict['action_dims' ] = 1
61 | context_info_dict['action_space'] = 'continuous'
62 | context_info_dict['state_high' ] = self.observation_space.high.tolist()
63 | context_info_dict['state_low' ] = self.observation_space.low.tolist()
64 | context_info_dict['action_high' ] = self.action_space.high.tolist()
65 | context_info_dict['action_low' ] = self.action_space.low.tolist()
66 |
67 | return context_info_dict
68 |
--------------------------------------------------------------------------------
/gym_extensions/tests/README.md:
--------------------------------------------------------------------------------
1 | # Testing
2 |
3 | This directory contains both unittest and manual testing scripts. Ideally, everything will be unit tested or integration tested, but it's research so understandably there may be some manual test scripts for simplicity and speed. If you see something you don't like, feel free to open an issue or a pull request to add more formal testing! It would be much appreciated!!! (And you'd be added to the list of contributors)
4 |
--------------------------------------------------------------------------------
/gym_extensions/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/gym_extensions/tests/__init__.py
--------------------------------------------------------------------------------
/gym_extensions/tests/test_contextual_envs.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym.envs.classic_control.cartpole import CartPoleEnv
3 | from gym.envs.classic_control.pendulum import PendulumEnv
4 | from gym_extensions.discrete import classic
5 |
6 | def test_cartpole_contextual():
7 | env_id = 'CartPoleContextual-v0'
8 | env = gym.make(env_id)
9 | if isinstance(env.unwrapped, CartPoleEnv):
10 | env.reset()
11 | else:
12 | raise NotImplementedError
13 |
14 | nr_of_items_context_space_info = 10
15 | nr_unwrapped = len(list(env.unwrapped.context_space_info().keys()))
16 | if nr_of_items_context_space_info != nr_unwrapped:
17 | print('context_space_info() function needs to be implemented!')
18 | raise NotImplementedError
19 |
20 | context_vect = [0.01, 0.01, 0.01, 0.01]
21 | # these should change because change_context_function
22 | if context_vect == env.unwrapped.context:
23 | raise AttributeError
24 |
25 | env.unwrapped.change_context(context_vect)
26 | if context_vect != env.unwrapped.context:
27 | raise AttributeError
28 |
29 |
30 | def test_pendulum_contextual():
31 | env_id = 'PendulumContextual-v0'
32 | env = gym.make(env_id)
33 | if isinstance(env.unwrapped, PendulumEnv):
34 | env.reset()
35 | else:
36 | raise NotImplementedError
37 |
38 | nr_of_items_context_space_info = 10
39 | nr_unwrapped = len(list(env.unwrapped.context_space_info().keys()))
40 | if nr_of_items_context_space_info != nr_unwrapped:
41 | print('context_space_info() function needs to be implemented!')
42 | raise NotImplementedError
43 |
44 | context_vect = [0.01, 0.01]
45 | if context_vect == env.unwrapped.context:
46 | raise AttributeError
47 |
48 | env.unwrapped.change_context(context_vect)
49 | if context_vect != env.unwrapped.context:
50 | raise AttributeError
51 |
52 |
53 | if __name__ == "__main__":
54 | test_cartpole_contextual()
55 | test_pendulum_contextual()
56 |
--------------------------------------------------------------------------------
/gym_extensions/tests/test_import.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 | def test_import_classic():
4 | """
5 | Test import of official envs as seen in the README
6 | """
7 | from gym_extensions.discrete import classic
8 | envs = [x for x in classic.custom_envs.keys()]
9 | [gym.make(x) for x in envs]
10 |
11 | def test_import_box2d():
12 | """
13 | Test import of official envs as seen in the README
14 | """
15 | from gym_extensions.continuous import box2d
16 | envs = [x for x in box2d.custom_envs.keys()]
17 | [gym.make(x) for x in envs]
18 |
19 | def test_import_gym_navigation_2d():
20 | """
21 | Test import of official envs as seen in the README
22 | """
23 | from gym_extensions.continuous import gym_navigation_2d
24 | envs = [x for x in gym_navigation_2d.custom_envs.keys()]
25 | [gym.make(x) for x in envs]
26 |
27 | if __name__ == '__main__':
28 | #print(test_import_classic())
29 | print(test_import_box2d())
30 | #print(test_import_gym_navigation_2d())
31 |
--------------------------------------------------------------------------------
/gym_extensions/tests/test_mujoco_import.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 | def test_import_mujoco():
4 | """
5 | Test import of official envs as seen in the README
6 | """
7 | from gym_extensions.continuous import mujoco
8 | envs = [x for x in mujoco.custom_envs.keys()]
9 | [gym.make(x) for x in envs]
10 | return True
11 |
12 | if __name__ == '__main__':
13 | print(test_import_mujoco())
14 |
--------------------------------------------------------------------------------
/gym_extensions/tests/test_wrappers.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym_extensions.wrappers.observation_transform_wrapper import ObservationTransformWrapper
3 | from gym_extensions.wrappers.transformers import (SimpleNormalizePixelIntensitiesTransformer,
4 | ResizeImageTransformer, AppendPrevTimeStepTransformer)
5 | import time
6 |
7 | def test_image_resize():
8 | gymenv = gym.make("Breakout-v4")
9 | transformers = [SimpleNormalizePixelIntensitiesTransformer(), ResizeImageTransformer(fraction_of_current_size=.35)]
10 | transformed_env = ObservationTransformWrapper(gymenv, transformers)
11 | return transformed_env
12 |
13 | def test_append_prev_timestep():
14 | gymenv = gym.make("Hopper-v1")
15 | transformers = [AppendPrevTimeStepTransformer()]
16 | transformed_env = ObservationTransformWrapper(gymenv, transformers)
17 | return transformed_env
18 |
19 | for env in [test_image_resize(), test_append_prev_timestep()]:
20 | env.reset()
21 | print(env.observation_space)
22 | for t in range(100):
23 | env.render()
24 |
25 | start = time.time()
26 | action = env.action_space.sample()
27 | end = time.time()
28 |
29 |
30 | start = time.time()
31 | observation, reward, done, info = env.step(action)
32 | print(observation)
33 | print(observation.shape)
34 | end = time.time()
35 |
36 | if done:
37 | print("Episode finished after {} timesteps".format(t+1))
38 | break
39 |
--------------------------------------------------------------------------------
/gym_extensions/wrappers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/gym_extensions/wrappers/__init__.py
--------------------------------------------------------------------------------
/gym_extensions/wrappers/normalized_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym import spaces
4 | from .proxy_env import ProxyEnv
5 | from.serializable import Serializable
6 | from gym.spaces.box import Box
7 |
8 |
9 | class NormalizedEnv(ProxyEnv, Serializable):
10 | def __init__(
11 | self,
12 | env,
13 | normalize_obs=False,
14 | obs_alpha=0.001,
15 | ):
16 | Serializable.quick_init(self, locals())
17 | ProxyEnv.__init__(self, env)
18 | self._normalize_obs = normalize_obs
19 | self._obs_alpha = obs_alpha
20 | self._obs_mean = np.zeros(np.prod(env.observation_space.shape))
21 | self._obs_var = np.ones(np.prod(env.observation_space.shape))
22 |
23 |
24 | def _update_obs_estimate(self, obs):
25 | flat_obs = self._wrapped_env.observation_space.flatten(obs)
26 | self._obs_mean = (1 - self._obs_alpha) * self._obs_mean + self._obs_alpha * flat_obs
27 | self._obs_var = (1 - self._obs_alpha) * self._obs_var + self._obs_alpha * np.square(flat_obs - self._obs_mean)
28 |
29 | def _apply_normalize_obs(self, obs):
30 | self._update_obs_estimate(obs)
31 | return (obs - self._obs_mean) / (np.sqrt(self._obs_var) + 1e-8)
32 |
33 | def reset(self):
34 | ret = self._wrapped_env.reset()
35 | if self._normalize_obs:
36 | return self._apply_normalize_obs(ret)
37 | else:
38 | return ret
39 |
40 | @property
41 | def action_space(self):
42 | if isinstance(self._wrapped_env.action_space, Box):
43 | ub = np.ones(self._wrapped_env.action_space.shape)
44 | return spaces.Box(-1 * ub, ub)
45 | return self._wrapped_env.action_space
46 |
47 | def step(self, action):
48 | if isinstance(self._wrapped_env.action_space, Box):
49 | # rescale the action
50 | lb, ub = self._wrapped_env.action_space.low, self._wrapped_env.action_space.high
51 | scaled_action = lb + (action + 1.) * 0.5 * (ub - lb)
52 | scaled_action = np.clip(scaled_action, lb, ub)
53 | else:
54 | scaled_action = action
55 | wrapped_step = self._wrapped_env.step(scaled_action)
56 | next_obs, reward, done, info = wrapped_step
57 | if self._normalize_obs:
58 | next_obs = self._apply_normalize_obs(next_obs)
59 | return next_obs, reward, done, info
60 |
61 | def __str__(self):
62 | return "Normalized: %s" % self._wrapped_env
63 |
64 | # def log_diagnostics(self, paths):
65 | # print "Obs mean:", self._obs_mean
66 | # print "Obs std:", np.sqrt(self._obs_var)
67 | # print "Reward mean:", self._reward_mean
68 | # print "Reward std:", np.sqrt(self._reward_var)
69 |
70 | normalize = NormalizedEnv
71 |
--------------------------------------------------------------------------------
/gym_extensions/wrappers/observation_transform_wrapper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import Env, spaces
3 |
4 | from cached_property import cached_property
5 |
6 | from .proxy_env import ProxyEnv
7 | from .serializable import Serializable
8 |
9 | BIG = 1e6
10 |
11 | class ObservationTransformWrapper(ProxyEnv, Serializable):
12 | ''' Runs observation through a set of transforms and adjusts the observation space accordingly.'''
13 |
14 | def __init__(self, env, transformers):
15 | '''
16 | :param env: the environment
17 | :param transformers: a list of transformers
18 | '''
19 | Serializable.quick_init(self, locals())
20 | self.transformers = transformers
21 | super(ObservationTransformWrapper, self).__init__(env)
22 |
23 | @cached_property
24 | def observation_space(self):
25 | obs_space = self._wrapped_env.observation_space
26 | print("Original observation space : {}".format(obs_space))
27 | for transformer in self.transformers:
28 | obs_space = transformer.transformed_observation_space(obs_space)
29 | print("Transformed observation space : {}".format(obs_space))
30 | return obs_space
31 |
32 | def reset(self):
33 | obs = self._wrapped_env.reset()
34 | for transformer in self.transformers:
35 | obs = transformer.transform(obs)
36 | transformer.reset()
37 | return obs
38 |
39 | def step(self, action):
40 | next_obs, reward, done, info = self._wrapped_env.step(action)
41 | for transformer in self.transformers:
42 | next_obs = transformer.transform(next_obs)
43 | return (next_obs, reward, done, info)
44 |
--------------------------------------------------------------------------------
/gym_extensions/wrappers/proxy_env.py:
--------------------------------------------------------------------------------
1 | from gym import Env
2 |
3 | from .serializable import Serializable
4 |
5 | class ProxyEnv(Env, Serializable):
6 | """
7 | From openai/rllab. We don't take a dependency on rllab itself because it is
8 | being deprecated and additionally it is difficult to install!!
9 |
10 | This wraps an OpenAI environment generically
11 | """
12 | def __init__(self, wrapped_env):
13 | Serializable.quick_init(self, locals())
14 | self._wrapped_env = wrapped_env
15 |
16 | @property
17 | def spec(self):
18 | return self._wrapped_env.spec
19 |
20 | @property
21 | def wrapped_env(self):
22 | return self._wrapped_env
23 |
24 | def reset(self, **kwargs):
25 | return self._wrapped_env.reset(**kwargs)
26 |
27 | @property
28 | def action_space(self):
29 | return self._wrapped_env.action_space
30 |
31 | @property
32 | def observation_space(self):
33 | return self._wrapped_env.observation_space
34 |
35 | def step(self, action):
36 | return self._wrapped_env.step(action)
37 |
38 | def render(self, *args, **kwargs):
39 | return self._wrapped_env.render(*args, **kwargs)
40 |
41 | def log_diagnostics(self, paths, *args, **kwargs):
42 | self._wrapped_env.log_diagnostics(paths, *args, **kwargs)
43 |
44 | @property
45 | def horizon(self):
46 | return self._wrapped_env.horizon
47 |
48 | def terminate(self):
49 | self._wrapped_env.terminate()
50 |
51 | def get_param_values(self):
52 | return self._wrapped_env.get_param_values()
53 |
54 | def set_param_values(self, params):
55 | self._wrapped_env.set_param_values(params)
56 |
--------------------------------------------------------------------------------
/gym_extensions/wrappers/serializable.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import sys
3 |
4 | class Serializable(object):
5 | """
6 | From openai/rllab we don't take a dependency on rllab itself because it is being
7 | deprecated and additionally it is difficult to install!!
8 |
9 | This is a simple class that makes serialization convenient.
10 | """
11 | def __init__(self, *args, **kwargs):
12 | self.__args = args
13 | self.__kwargs = kwargs
14 |
15 | def quick_init(self, locals_):
16 | if getattr(self, "_serializable_initialized", False):
17 | return
18 | if sys.version_info >= (3, 0):
19 | spec = inspect.getfullargspec(self.__init__)
20 | # Exclude the first "self" parameter
21 | if spec.varkw:
22 | kwargs = locals_[spec.varkw]
23 | else:
24 | kwargs = dict()
25 | else:
26 | spec = inspect.getargspec(self.__init__)
27 | if spec.keywords:
28 | kwargs = locals_[spec.keywords]
29 | else:
30 | kwargs = dict()
31 | if spec.varargs:
32 | varargs = locals_[spec.varargs]
33 | else:
34 | varargs = tuple()
35 | in_order_args = [locals_[arg] for arg in spec.args][1:]
36 | self.__args = tuple(in_order_args) + varargs
37 | self.__kwargs = kwargs
38 | setattr(self, "_serializable_initialized", True)
39 |
40 | def __getstate__(self):
41 | return {"__args": self.__args, "__kwargs": self.__kwargs}
42 |
43 | def __setstate__(self, d):
44 | # convert all __args to keyword-based arguments
45 | if sys.version_info >= (3, 0):
46 | spec = inspect.getfullargspec(self.__init__)
47 | else:
48 | spec = inspect.getargspec(self.__init__)
49 | in_order_args = spec.args[1:]
50 | out = type(self)(**dict(zip(in_order_args, d["__args"]), **d["__kwargs"]))
51 | self.__dict__.update(out.__dict__)
52 |
53 | @classmethod
54 | def clone(cls, obj, **kwargs):
55 | assert isinstance(obj, Serializable)
56 | d = obj.__getstate__()
57 | d["__kwargs"] = dict(d["__kwargs"], **kwargs)
58 | out = type(obj).__new__(type(obj))
59 | out.__setstate__(d)
60 | return out
61 |
--------------------------------------------------------------------------------
/gym_extensions/wrappers/transformers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.misc
3 | from gym.spaces.box import Box
4 | from scipy.misc import imresize
5 |
6 | from cached_property import cached_property
7 |
8 |
9 | # TODO: move this to folder with different files
10 |
11 | class BaseTransformer(object):
12 | """
13 | Base transformer interface, inherited objects should conform to this
14 | """
15 |
16 | def transform(self, observation):
17 | """
18 | observation to transform
19 | """
20 | raise NotImplementedError
21 |
22 | def transformed_observation_space(self, prev_observation_space):
23 | """
24 | prev_observation_space and how it is modified
25 | """
26 | return prev_observation_space
27 |
28 | def reset(self):
29 | """
30 | resets the transformer if there is an operation to be made
31 | """
32 | return
33 |
34 | class AppendPrevTimeStepTransformer(BaseTransformer):
35 | """
36 | Keeps track of and appends the previous observation timestep.
37 | """
38 | def __init__(self):
39 | self.prev_timestep = None
40 |
41 | def transform(self, observation):
42 | if self.prev_timestep is None:
43 | self.prev_timestep = np.zeros(observation.shape)
44 |
45 | new_obs = np.concatenate([observation.reshape((1, -1)), self.prev_timestep.reshape((1, -1))], axis=1).reshape(-1)
46 |
47 | self.prev_timestep = observation
48 | return new_obs
49 |
50 | def transformed_observation_space(self, prev_observation_space):
51 | if type(prev_observation_space) is Box:
52 | #TODO: should use tile?
53 | copy = np.copy(prev_observation_space.low.reshape((1, -1)))
54 | low = np.concatenate([copy, copy], axis=1)
55 | copy = np.copy(prev_observation_space.high.reshape((1, -1)))
56 | high = np.concatenate([copy, copy], axis=1)
57 | return Box(low.reshape(-1), high.reshape(-1))
58 | else:
59 | raise NotImplementedError("Currently only support Box observation spaces for ResizeImageTransformer")
60 |
61 | return prev_observation_space
62 |
63 | def reset(self):
64 | self.prev_timestep = None
65 |
66 | class SimpleNormalizePixelIntensitiesTransformer(BaseTransformer):
67 | """
68 | Normalizes pixel intensities simply by dividing by 255.
69 | """
70 | def transform(self, observation):
71 | return np.array(observation).astype(np.float32) / 255.0
72 |
73 | def transformed_observation_space(self, wrapped_observation_space):
74 | return wrapped_observation_space
75 |
76 | class ResizeImageTransformer(BaseTransformer):
77 | """
78 | Rescale a given image by a percentage
79 | """
80 |
81 | def __init__(self, fraction_of_current_size):
82 | self.fraction_of_current_size = fraction_of_current_size
83 |
84 | def transform(self, observation):
85 | return scipy.misc.imresize(observation, self.fraction_of_current_size)
86 |
87 | def transformed_observation_space(self, wrapped_observation_space):
88 | if type(wrapped_observation_space) is Box:
89 | return Box(scipy.misc.imresize(wrapped_observation_space.low, self.fraction_of_current_size), scipy.misc.imresize(wrapped_observation_space.high, self.fraction_of_current_size))
90 | else:
91 | raise NotImplementedError("Currently only support Box observation spaces for ResizeImageTransformer")
92 |
93 | class RandomSensorMaskTransformer(BaseTransformer):
94 | """
95 | Randomly occlude a given percentage of sensors on every observation.
96 | Randomly occludes different ones every time
97 | """
98 |
99 | def __init__(self, env, percent_of_sensors_to_occlude=.15):
100 | """
101 | Knock out random sensors
102 | """
103 | self.percent_of_sensors_to_occlude = percent_of_sensors_to_occlude
104 | self.obs_dims = env.observation_space.flat_dim
105 |
106 | def occlude(self, obs):
107 | sensor_idx = np.random.randint(0, self.obs_dims-1, size=int(self.obs_dims * self.percent_of_sensors_to_occlude))
108 | obs[sensor_idx] = 0
109 | return obs
110 |
111 | def transform(self, observation):
112 | return self.occlude(observation)
113 |
--------------------------------------------------------------------------------
/multitask_benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/multitask_benchmarks/__init__.py
--------------------------------------------------------------------------------
/multitask_benchmarks/commands_run:
--------------------------------------------------------------------------------
1 | python multitask_benchmarks/run_trpo.py HopperGravityHalf-v0 HopperGravityThreeQuarters-v0 Hopper-v1 HopperGravityOneAndQuarter-v0 HopperGravityOneAndHalf-v0 --num_epochs 500 --batch_size 25000 --text_log_file ./data/hopper_gravity.log --tabular_log_file ./data/hopper_gravity_progress.csv &> debug_log_hopper.log &
2 |
3 | python multitask_benchmarks/run_trpo.py HalfCheetahGravityHalf-v0 HalfCheetahGravityThreeQuarters-v0 HalfCheetah-v1 HalfCheetahGravityOneAndQuarter-v0 HalfCheetahGravityOneAndHalf-v0 --num_epochs 500 --batch_size 25000 --text_log_file ./data/hc_gravity.log --tabular_log_file ./data/hc_gravity_progress.csv &> debug_log_hc.log &
4 |
5 | nohup python multitask_benchmarks/run_trpo.py Walker2dWithSensor-v0 Walker2dWall-v0 --num_epochs 500 --batch_size 25000 --text_log_file ./walker2d_transfer.log --tabular_log_file ./walker2d_transfer.csv &
6 |
7 | nohup python multitask_benchmarks/run_trpo.py HopperWithSensor-v0 HopperWall-v0 --num_epochs 500 --batch_size 25000 --text_log_file ./hopper_transfer.log --tabular_log_file ./hopper_transfer.csv &
8 |
--------------------------------------------------------------------------------
/multitask_benchmarks/run_trpo.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.normalized_env import normalize
2 | from rllab.misc.instrument import stub, run_experiment_lite
3 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
4 | from rllab.envs.gym_env import GymEnv
5 |
6 | from sandbox.rocky.tf.envs.base import TfEnv
7 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
8 | from sandbox.rocky.tf.algos.trpo import TRPO
9 | from rllab.misc import ext
10 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
11 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp
12 | import rllab.misc.logger as logger
13 | import gym_extensions
14 | import pickle
15 | import os.path as osp
16 | import numpy as np
17 | from multitask_benchmarks.utils import *
18 |
19 | import tensorflow as tf
20 |
21 | import argparse
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument("envs", nargs='+', help="The list of environments to train on in order. Eval rollouts will be run on all environments at the end.")
24 | parser.add_argument("--num_epochs", default=100, type=int, help="Number of epochs to run.")
25 | parser.add_argument("--num_final_rollouts", default=20, type=int, help="Number of rollouts to run on final evaluation of environments.")
26 | parser.add_argument("--batch_size", default=25000, type=int, help="Batch_size per epoch (this is the number of (state, action) samples, not the number of rollouts)")
27 | parser.add_argument("--step_size", default=0.01, type=float, help="Step size for TRPO (i.e. the maximum KL bound)")
28 | parser.add_argument("--reg_coeff", default=1e-5, type=float, help="Regularization coefficient for TRPO")
29 | parser.add_argument("--text_log_file", default="./data/debug.log", help="Where text output will go")
30 | parser.add_argument("--tabular_log_file", default="./data/progress.csv", help="Where tabular output will go")
31 | args = parser.parse_args()
32 |
33 | # stub(globals())
34 |
35 | # ext.set_seed(1)
36 | logger.add_text_output(args.text_log_file)
37 | logger.add_tabular_output(args.tabular_log_file)
38 | logger.set_log_tabular_only(False)
39 |
40 | envs = []
41 |
42 | for env_name in args.envs:
43 | gymenv = GymEnv(env_name, force_reset=True, record_video=False, record_log=False)
44 | env = TfEnv(normalize(gymenv))
45 | envs.append((env_name, env))
46 |
47 | policy = GaussianMLPPolicy(
48 | name="policy",
49 | env_spec=env.spec,
50 | # The neural network policy should have two hidden layers, each with 32 hidden units.
51 | hidden_sizes=(100, 50, 25),
52 | hidden_nonlinearity=tf.nn.relu,
53 | )
54 |
55 | baseline = LinearFeatureBaseline(env_spec=env.spec)
56 |
57 |
58 | with tf.Session() as sess:
59 | for env_name, env in envs:
60 |
61 | logger.log("Training Policy on %s" % env_name)
62 |
63 | algo = TRPO(
64 | env=env,
65 | policy=policy,
66 | baseline=baseline,
67 | batch_size=args.batch_size,
68 | max_path_length=env.horizon,
69 | n_itr=args.num_epochs,
70 | discount=0.99,
71 | step_size=args.step_size,
72 | optimizer=ConjugateGradientOptimizer(reg_coeff=args.reg_coeff, hvp_approach=FiniteDifferenceHvp(base_eps=args.reg_coeff))
73 | )
74 |
75 | custom_train(algo, sess=sess)
76 |
77 | rollouts = algo.obtain_samples(args.num_epochs + 1)
78 |
79 | logger.log("Average reward for training rollouts on (%s): %f +- %f " % (env_name, np.mean([np.sum(p['rewards']) for p in rollouts]), np.std([np.sum(p['rewards']) for p in rollouts])))
80 |
81 | # Final evaluation on all environments using the learned policy
82 |
83 | total_rollouts = []
84 | for env_name, env in envs:
85 | rollouts = []
86 | for i in range(args.num_final_rollouts):
87 | rollout = rollout_policy(policy, env, max_path_length=env.horizon, speedup=1, get_image_observations=True, animated=True)
88 | rollouts.append(rollout)
89 | total_rollouts.append(rollout)
90 |
91 | logger.log("Average reward for eval rollouts on (%s): %f +- %f " % (env_name, np.mean([np.sum(p['rewards']) for p in rollouts]), np.std([np.sum(p['rewards']) for p in rollouts])))
92 |
93 | logger.log("Total Average across all rollouts and envs: %f +- %f " % (np.mean([np.sum(p['rewards']) for p in total_rollouts]), np.std([np.sum(p['rewards']) for p in total_rollouts])))
94 |
--------------------------------------------------------------------------------
/multitask_benchmarks/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import time
3 | import rllab.misc.logger as logger
4 | import pickle
5 | from rllab.misc import tensor_utils
6 | import numpy as np
7 | from tqdm import tqdm
8 | import copy
9 |
10 | TINY = 1.0e-8
11 |
12 | def initialize_uninitialized(sess):
13 | global_vars = tf.global_variables()
14 | is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars])
15 | not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f]
16 |
17 | print([str(i.name) for i in not_initialized_vars]) # only for testing
18 | if len(not_initialized_vars):
19 | sess.run(tf.variables_initializer(not_initialized_vars))
20 |
21 | def custom_train(algo, sess=None):
22 | """
23 | This is necessary so that we don't wipe away already initialized policy params.
24 | Ideally, we should pull request this in as an option to RLlab and remove it from here once done
25 | """
26 | created_session = True if (sess is None) else False
27 | if sess is None:
28 | sess = tf.Session()
29 | sess.__enter__()
30 |
31 | rollout_cache = []
32 | initialize_uninitialized(sess)
33 | algo.start_worker()
34 | start_time = time.time()
35 | for itr in range(algo.start_itr, algo.n_itr):
36 | itr_start_time = time.time()
37 | with logger.prefix('itr #%d | ' % itr):
38 | logger.log("Obtaining samples...")
39 | paths = algo.obtain_samples(itr)
40 | logger.log("Processing samples...")
41 | samples_data = algo.process_samples(itr, paths)
42 | logger.log("Logging diagnostics...")
43 | algo.log_diagnostics(paths)
44 | logger.log("Optimizing policy...")
45 | algo.optimize_policy(itr, samples_data)
46 | logger.log("Saving snapshot...")
47 | params = algo.get_itr_snapshot(itr, samples_data) # , **kwargs)
48 | if algo.store_paths:
49 | params["paths"] = samples_data["paths"]
50 | logger.save_itr_params(itr, params)
51 | logger.log("Saved")
52 | logger.record_tabular('Time', time.time() - start_time)
53 | logger.record_tabular('ItrTime', time.time() - itr_start_time)
54 | logger.dump_tabular(with_prefix=False)
55 |
56 | algo.shutdown_worker()
57 | if created_session:
58 | sess.close()
59 |
60 |
61 | def rollout_policy(agent, env, max_path_length=200, speedup=1, get_image_observations=False, animated=False):
62 | """
63 | Mostly taken from https://github.com/bstadie/third_person_im/blob/master/sandbox/bradly/third_person/algos/cyberpunk_trainer.py#L164
64 | Generate a rollout for a given policy
65 | """
66 | observations = []
67 | im_observations = []
68 | actions = []
69 | rewards = []
70 | agent_infos = []
71 | env_infos = []
72 | o = env.reset()
73 | path_length = 0
74 |
75 | while path_length <= max_path_length:
76 | a, agent_info = agent.get_action(o)
77 | next_o, r, d, env_info = env.step(a)
78 | observations.append(env.observation_space.flatten(o))
79 |
80 | actions.append(env.action_space.flatten(a))
81 | agent_infos.append(agent_info)
82 | env_infos.append(env_info)
83 | path_length += 1
84 | o = next_o
85 | if get_image_observations:
86 | if not animated:
87 | pixel_array = env.render(mode="rgb_array")
88 | else:
89 | pixel_array = env.render()
90 |
91 | if pixel_array is None and not animated:
92 | # Not convinced that behaviour works for all environments, so until
93 | # such a time as I'm convinced of this, drop into a debug shell
94 | print("Problem! Couldn't get pixels! Dropping into debug shell.")
95 | import pdb; pdb.set_trace()
96 | im_observations.append(pixel_array)
97 | if d:
98 | rewards.append(r)
99 | break
100 | else:
101 | rewards.append(r)
102 |
103 | if animated:
104 | env.render(close=True)
105 |
106 | im_observations = tensor_utils.stack_tensor_list(im_observations)
107 | observations = tensor_utils.stack_tensor_list(observations)
108 | rewards = tensor_utils.stack_tensor_list(rewards)
109 |
110 | return dict(
111 | observations=observations,
112 | im_observations=im_observations,
113 | actions=tensor_utils.stack_tensor_list(actions),
114 | rewards=rewards,
115 | agent_infos=tensor_utils.stack_tensor_dict_list(agent_infos),
116 | env_infos=tensor_utils.stack_tensor_dict_list(env_infos),
117 | )
118 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.11
2 | gym[all]>=0.9.2
3 | pyrr
4 | six
5 | cached_property
6 | networkx
7 | scipy>=0.17.0
8 | nose
9 | pygame
10 | scikit-image
11 | matplotlib>=1.3.1
12 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | import sys
3 | from os.path import abspath, join, dirname, realpath
4 |
5 | def read_requirements_file(filename):
6 | req_file_path = '%s/%s' % (dirname(realpath(__file__)), filename)
7 | with open(req_file_path) as f:
8 | return [line.strip() for line in f]
9 |
10 | # only support 2.7 for mujoco compatibility
11 | if sys.version_info < (2,7):
12 | print('Sorry, Python < 2.7 is not supported, please install Python 3.5.2')
13 | sys.exit()
14 |
15 | setup(name='gym_extensions',
16 | version='0.0.1',
17 | packages=['gym_extensions'],
18 | python_requires='>=3.5',
19 | install_requires=read_requirements_file('requirements.txt'),
20 | # mujoco just released 1.5, for now stick with 1.3
21 | #extras_requires={'mujoco':'mujoco-py<1.50.2,>=1.50.1'},
22 | extras_requires={'mujoco':'mujoco-py<1.50,>=0.5'},
23 | description='Extensions to OpenAI Gym for multitask learning, inverse reinforcement learning, and other peripheral tasks.',
24 | #author='Peter Henderson',
25 | url='https://github.com/Breakend/gym-extensions',
26 | )
27 |
--------------------------------------------------------------------------------