├── .gitignore ├── LICENSE ├── README.md ├── assets ├── Falcon.png └── Mcgill.png ├── docs ├── 2d_navigation_envs.md ├── _config.yml ├── benchmark_algos │ └── trpo.md ├── benchmarks.md ├── discrete_envs.md ├── falcon_rocket.md ├── index.md ├── mujoco_gym_envs.md └── wrappers.md ├── examples ├── EnvironmentGeneration.ipynb ├── image_based_navigation_example.py ├── manual_control_example.py ├── range_based_navigation_example.py └── state_based_navigation_example.py ├── gym_extensions ├── __init__.py ├── continuous │ ├── __init__.py │ ├── box2d │ │ ├── __init__.py │ │ └── falcon_drone_ship_lander.py │ ├── gym_navigation_2d │ │ ├── __init__.py │ │ ├── assets │ │ │ └── worlds_640x480_v0.pkl │ │ ├── env_generator.py │ │ ├── env_utils.py │ │ ├── geometry_utils.py │ │ ├── gym_rendering.py │ │ ├── image_based_navigation.py │ │ ├── range_based_navigation.py │ │ ├── rrt.py │ │ └── test_env_generator.py │ └── mujoco │ │ ├── __init__.py │ │ ├── gravity_envs.py │ │ ├── modified_ant.py │ │ ├── modified_arm.py │ │ ├── modified_half_cheetah.py │ │ ├── modified_hopper.py │ │ ├── modified_humanoid.py │ │ ├── modified_walker2d.py │ │ ├── perturbed_bodypart_env.py │ │ └── wall_envs.py ├── discrete │ ├── __init__.py │ └── classic │ │ ├── __init__.py │ │ ├── cartpole.py │ │ ├── cartpole_contextual.py │ │ └── pendulum_contextual.py ├── tests │ ├── README.md │ ├── __init__.py │ ├── test_contextual_envs.py │ ├── test_import.py │ ├── test_mujoco_import.py │ └── test_wrappers.py └── wrappers │ ├── __init__.py │ ├── normalized_env.py │ ├── observation_transform_wrapper.py │ ├── proxy_env.py │ ├── serializable.py │ └── transformers.py ├── multitask_benchmarks ├── __init__.py ├── commands_run ├── run_trpo.py └── utils.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # swp 71 | # *.swp 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | .venv 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | 105 | data/ 106 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Peter Henderson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | # OpenAI gym 24 | This work is derived from [OpenAI Gym](https://github.com/openai/gym) used under the MIT License. 25 | 26 | # Mujoco models 27 | This work is derived from [MuJoCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license: 28 | ``` 29 | This file is part of MuJoCo. 30 | Copyright 2009-2015 Roboti LLC. 31 | Mujoco :: Advanced physics simulation engine 32 | Source : www.roboti.us 33 | Version : 1.31 34 | Released : 23Apr16 35 | Author :: Vikash Kumar 36 | Contacts : kumar@roboti.us 37 | ``` 38 | 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # gym-extensions 4 | This python package is an extension to OpenAI Gym for auxiliary tasks (multitask learning, transfer learning, inverse reinforcement learning, etc.) 5 | 6 | 7 | ## Dependencies 8 | 9 | - [Python 3.5.2](https://www.python.org/) 10 | - [OpenAI Gym](https://gym.openai.com/) 11 | - [MuJoCo](http://mujoco.org/) (Optional) 12 | - [mujoco-py](https://github.com/openai/mujoco-py#install-mujoco) (Optional) 13 | - [roboschool](https://github.com/openai/roboschool) (Optional) 14 | 15 | ## Installation 16 | 17 | Check out the latest code: 18 | ```bash 19 | git clone https://github.com/Breakend/gym-extensions-multitask.git 20 | ``` 21 | 22 | ### Install without MuJoCo Support 23 | ```bash 24 | pip3 install -e . 25 | ``` 26 | 27 | ### Install with MuJoCo Support 28 | Install MuJoCo according to [mujoco-py](https://github.com/openai/mujoco-py#install-mujoco). 29 | 1. Obtain license for [MuJoCo](http://mujoco.org/) 30 | 2. Download MuJoCo 1.50 binaries 31 | 3. Unzip into `mjpro150` directory `~/.mujoco/mjproj150` and place license 32 | at `~/.mujoco/mjkey.txt` 33 | 4. Finally, install gym-extensions with mujoco-py enabled: 34 | 35 | ```bash 36 | pip3 install -e .[mujoco] 37 | ``` 38 | 39 | ### Test Installation 40 | ``` 41 | nosetests -v gym_extensions 42 | ``` 43 | 44 | ### Possible Issues 45 | 46 | Due to the dependency on OpenAI gym you may have some trouble when installing gym on macOS, to remedy: 47 | 48 | ```bash 49 | # as per: https://github.com/openai/gym/issues/164 50 | export MACOSX_DEPLOYMENT_TARGET=10.12; pip install -e . 51 | ``` 52 | 53 | Also, if you get the following error: 54 | ``` 55 | >>> import matplotlib.pyplot 56 | Traceback (most recent call last): 57 | File "", line 1, in 58 | File "~/anaconda2/lib/python2.7/site-packages/matplotlib/pyplot.py", line 115, in 59 | _backend_mod, new_figure_manager, draw_if_interactive, _show = pylab_setup() 60 | File "~/anaconda2/lib/python2.7/site-packages/matplotlib/backends/__init__.py", line 32, in pylab_setup 61 | globals(),locals(),[backend_name],0) 62 | File "~/anaconda2/lib/python2.7/site-packages/matplotlib/backends/backend_gtk.py", line 19, in 63 | raise ImportError("Gtk* backend requires pygtk to be installed.") 64 | ImportError: Gtk* backend requires pygtk to be installed. 65 | ``` 66 | the easiest fix is to switch backends for matplotlib. You can do this by setting `backend: TkAgg` in `~/.config/matplotlib/matplotlibrc` or `~/.matplotlib/matplotlibrc` 67 | 68 | ## Usage 69 | 70 | For specific environments (you don't necessarily want to import the whole project) 71 | 72 | ```python 73 | from gym_extensions.continuous import gym_navigation_2d 74 | env = gym.make("State-Based-Navigation-2d-Map1-Goal1-v0") 75 | ``` 76 | 77 | ```python 78 | from gym_extensions.continuous import mujoco 79 | env = gym.make("HopperWall-v0") 80 | ``` 81 | 82 | 83 | ## More info 84 | 85 | More information will be provided on our doc website: https://breakend.github.io/gym-extensions/ 86 | 87 | ## Contributions 88 | 89 | To contributing environments please use the general directory structure we have in place and provide **pull requests**. We're still working on making this extension to OpenAI gym the best possible so things may change. Any changes to existing environments should involve an incremental update to the name of the environment (i.e. Hopper-v0 vs. Hopper-v1). If you are not associated with McGill and contribute significantly, please add your association to: 90 | 91 |
docs/index.md
92 | 93 | ## References 94 | 95 | Some of this work borrowed ideas and code from OpenAI rllab and OpenAI Gym. We thank those creators for their work and cite links to reference code inline where possible. 96 | 97 | ## Contributors 98 | 99 | Here's a list of contributors! 100 | 101 | + @Breakend 102 | + @weidi-chang 103 | + @florianshkurti 104 | + @johannah 105 | + @vBarbaros 106 | + @mklissa 107 | 108 | ## Who's using this? 109 | 110 | Works that have used this framework include: 111 | 112 | ``` 113 | Klissarov, Martin, Pierre-Luc Bacon, Jean Harb, and Doina Precup. "Learnings Options End-to-End for Continuous Action Tasks." arXiv preprint arXiv:1712.00004 (2017). 114 | 115 | Henderson, Peter, Wei-Di Chang, Pierre-Luc Bacon, David Meger, Joelle Pineau, and Doina Precup. "OptionGAN: Learning Joint Reward-Policy Options using Generative Adversarial Inverse Reinforcement Learning." arXiv preprint arXiv:1709.06683 (2017). 116 | ``` 117 | 118 | ## Citation for this work! 119 | 120 | If you use this work please use the following citation. If using the Space X environment, please also reference @vBarbaros for credit. 121 | 122 | ``` 123 | @article{henderson2017multitask, 124 | author = {{Henderson}, P. and {Chang}, W.-D. and {Shkurti}, F. and {Hansen}, J. and 125 | {Meger}, D. and {Dudek}, G.}, 126 | title = {Benchmark Environments for Multitask Learning in Continuous Domains}, 127 | journal = {ICML Lifelong Learning: A Reinforcement Learning Approach Workshop}, 128 | year={2017} 129 | } 130 | ``` 131 | -------------------------------------------------------------------------------- /assets/Falcon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/assets/Falcon.png -------------------------------------------------------------------------------- /assets/Mcgill.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/assets/Mcgill.png -------------------------------------------------------------------------------- /docs/2d_navigation_envs.md: -------------------------------------------------------------------------------- 1 | ## 2D continuous navigation envs 2 | 3 | We also provide several novel 2D environments that focus on navigation tasks with continuous action spaces to enable benchmarking of learning tasks requiring an implicit memory. The tasks take place in a given occupancy grid map. We opt to make the layout and shape of the obstacles as the only disambiguating feature for localizing within the map. Aside from that information, the environment does not have any texture mapping or other distinctive features. 4 | We provide three different types of navigation tasks, increasing in level of difficulty: 5 | 6 | + Image-based navigation where the agent has access to the entire map, including its own position within the map and the destination in the map as part of the image data. 7 | + State-based navigation, where the agent has access to its own position in the map and the distance and bearing to the closest obstacle. A simpler version also contains the destination coordinates. 8 | + Navigation based only on local range-and-bearing data around the agent using ray-tracing. It has to perform mapping and estimate its own position within the map (i.e. perform SLAM), while at the same time exploring to find the goal location, and learning to avoid obstacles. We also modify this with a simpler version, where the goal and current position are known as well. 9 | 10 | We provide a reward of -1 for every timestep, -5 for obstacle collisions, and +10 for reaching the goal (which also ends the task, similarly to the MountainCar-v0 environment in OpenAI Gym). The action space is the bounded velocity to apply in the x and y directions. 11 | 12 | **Environments** 13 | 14 | + Learning to navigate and search in 2D environments observing only raytracing distance readouts (Limited-RangeBased-Navigation-2d-Map{0-9}-Goal{0-2}-v0) 15 | + Learning to navigate and search in 2D environments observing current position, raytracing distance readouts, and known goal position (Limited-Range-Based-Navigation-2dMap{0-9}-Goal{0-2}-KnownPositions-v0) 16 | + Learning to navigate and search in 2D environments observing only the 2D map image with goal location and current position highlighted in different colors (Image-BasedNavigation-2d-Map{0-9}-Goal{0-2}-v0) 17 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman 2 | show_downloads: true 3 | markdown: kramdown 4 | -------------------------------------------------------------------------------- /docs/benchmarks.md: -------------------------------------------------------------------------------- 1 | # Benchmarks! 2 | 3 | Part of our goal in releasing these environments is so people can upload their algorithm benchmark results. In the spirit of doing so, we start off with some simple TRPO results. First, we define several task groupings which may be of use to run together. Then we provide some results from running TRPO sequentially across these groupings. While these aren't the only set of combinations, task groupings, and experiments that can be done with these environments, we show that our results show some fundamental problems with lifelong learning or multitask learning (such as catastrophic forgetting, information generalization, etc.). 4 | 5 | ## Possible Task Groupings 6 | 7 | ## Benchmark Results 8 | 9 | + [TRPO](benchmark_algos/trpo) 10 | 11 | ## Uploading Algorithm Results 12 | 13 | To upload your algorithms' results, please send a pull request with a doc page as in the *docs/benchmark_algos/trpo* folder. 14 | 15 | We require three sections: 16 | 17 | 1. Algorithm description and link to code (also include hyperparameters used) 18 | 2. Task Groupings and Experimental Setup (number of trials, sequential vs. simultaneous learning, etc.) 19 | 3. Results (Average Reward, Std. Deviation of reward, number of trials, etc. on each environment in the grouping) 20 | -------------------------------------------------------------------------------- /docs/discrete_envs.md: -------------------------------------------------------------------------------- 1 | 2 | ## Discrete Envs 3 | 4 | We are adding support for discrete envs that are customizable for simple transfer learning tasks. An example is: 5 | 6 | ```python 7 | gym_extensions.discrete.classic.cartpole.register_custom_cartpole("CartpoleLongPole-v0", gravity=9.8, masscart=1.0, masspole=0.1, pole_length=1.0, force_mag=10.0) 8 | ``` 9 | 10 | Which you can then make with, gym.make("CartpoleLongPole-v0"). 11 | 12 | While we want to support these custom envs, the main goal of this project is to come up with a standard set of added tasks which can be used for in the contexts of MultiTask or LifeLong learning, particularly in Continuous domains. To that extent, we come up with the following. 13 | -------------------------------------------------------------------------------- /docs/falcon_rocket.md: -------------------------------------------------------------------------------- 1 | ## SpaceX-like Falcon Rocket 2 | 3 | This Falcon rocket lander environment encourages landing on an autonomous floating drone ship. It uses part of the LunarLander environment as a base (from OpenAI Gym ) and adds some elements that make the task more difficult and interesting as follows. 4 | 5 | The state-dimensions contains now 13 params, some of which are part of the drone-ship. The difficulty of the task can be managed in many ways (e.g., adding more/less power to rocket's engines, increasing/decreasing drone's force that pushes it around, so that it oscillates from left to right and back with different velocity, etc). 6 | 7 | The reward function accounts for the falcon rocket landing on both legs (otherwise it's not quite a landing), for the angle and the velocity of the drone ship (higher speed, more unstable drone ship, higher the reward); the episodes are done once the rocket has landed, or if the rocket goes below the drone ship level. It also penalizes longer episodes, i.e., more time-steps until a successful landing results in lower reward. 8 | 9 | You can land the SpaceX Falcon rocket from the keyboard as well. 10 | 11 | [![Take a look at it here ](https://github.com/vBarbaros/gym-extensions/raw/master/assets/Falcon.png)](https://www.youtube.com/watch?v=GpHylnSox_Q&feature=youtu.be "Hope you'll like it!") 12 | 13 | Check the source code here -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | Welcome to **gym-extensions**! We're building on top of OpenAI Gym in the pursuit of creating environments suitable for benchmarking a wide variety of auxiliary tasks. On the RoadMap right now are: 2 | 3 | + Multitask, Transfer, and LifeLong Learning Benchmarks 4 | + Learning to navigate and search tasks 5 | + More realistic robotics tasks integrated with ROS and Gazebo 6 | 7 | There's a lot of info to go over! 8 | 9 | + You can read about the modified Mujoco OpenAI continuous domain tasks [here](mujoco_gym_envs) 10 | + You can read about new 2D navigation continuous domain tasks [here](2d_navigation_envs) 11 | + We also provide a set of wrappers to process agent observations in tasks as described [here](wrappers) 12 | + Finally, you can enjoy a Brand New SpaceX-like Falcon rocket that can be thought to land on the autonomous drone ship [here](falcon_rocket) 13 | 14 | We want all the envs and pull requests we can get! Open an issue if you're confused and we'll fix it ASAP. 15 | 16 | In progress documentation: 17 | 18 | + A website for benchmarked multitask runs 19 | + Pictures and videos 20 | + A nicer layout 21 | + How to create new environments and contribute 22 | 23 | ### Brought to you by 24 | 25 | ![mcgill-logo](https://github.com/Breakend/gym-extensions/raw/master/assets/Mcgill.png){:width="30%" class="img-responsive"} 26 | -------------------------------------------------------------------------------- /docs/mujoco_gym_envs.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | 20 | 21 | ## Continuous Mujoco Modified OpenAI Gym Environments 22 | 23 | ### Modified Gravity 24 | 25 | For the running agents, we provide ready environments with various scales of simulated earth-like gravity, ranging from one half to one and a half of the normal gravity level (−4.91 to −12.26 $$m\cdot s^{-2}$$ in increments of $$.25\cdot g$$ earth gravity). We propose that a successful multitask learning algorithm will extract the underlying walking action structure and reuse the applicable knowledge without forgetting how to walk in varying gravity conditions. 26 | 27 | **HopperGravityHalf-v0** 28 | 29 | The standard Mujoco OpenAI gym hopper task with gravity scaled by 0.5 30 | 31 | **HopperGravityThreeQuarters-v0** 32 | 33 | The standard Mujoco OpenAI gym hopper task with gravity scaled by 0.75 34 | 35 | **HopperGravityOneAndQuarter-v0** 36 | 37 | The standard Mujoco OpenAI gym hopper task with gravity scaled by 1.25 38 | 39 | **HopperGravityOneAndHalf-v0** 40 | 41 | The standard Mujoco OpenAI gym hopper task with gravity scaled by 1.5 42 | 43 | **Similarly:** 44 | 45 | + Walker2dGravityHalf-v0 46 | + Walker2dGravityThreeQuarters-v0 47 | + Walker2dGravityOneAndQuarter-v0 48 | + Walker2dGravityOneAndHalf-v0 49 | + HalfCheetahGravityHalf-v0 50 | + HalfCheetahGravityThreeQuarters-v0 51 | + HalfCheetahGravityOneAndQuarter-v0 52 | + HalfCheetahGravityOneAndHalf-v0 53 | + HumanoidGravityHalf-v0 54 | + HumanoidGravityThreeQuarters-v0 55 | + HumanoidGravityOneAndQuarter-v0 56 | + HumanoidGravityOneAndHalf-v0 57 | 58 | ### Humanoid Extended Tasks 59 | 60 | We provide a humanoid multitask environment which combines the rewards for standing up and running in a single environment. The reward scale for this task is rather large, but aligns with the HumanoidStandup-v1 environment from OpenAI Gym. Additionally we provide a version of each environment with a sensor readout. When no wall is used, all sensors read zero. When a wall is used, each returns a distance to the wall as previously described. 61 | 62 | **HumanoidStandupAndRun-v0** 63 | 64 | This task rewards standing up and running. Combines the reward functions from HumanoidStandup and Humanoid. 65 | 66 | ### Random Walls and Sensors 67 | 68 | Inspired by the wall jumping experiment in (Finn et al., 2016), we build a set of similar environments by extending the OpenAI running tasks to use a multi-beam noiseless range sensor. 69 | 70 | Ray-beams are emitted from the torso of the runner to act as sensor readings. We also provide the usual running tasks with no walls and the sensor perception enabled so that an agent can first be trained with the larger observation vector before being run or trained on the environments with a wall set in the path of the agent. 71 | 72 | **Walker2dWithSensor-v0** 73 | 74 | This task is identical to the Walker2d environment in OpenAI gym, but with empty sensor readouts (10 extra observations by default) which would detect a wall if it were there. This is to keep the agent's state space the same dimensionality for transfer learning to a Wall environment. 75 | 76 | **Walker2dWall-v0** 77 | 78 | In this task, the agent is equipped with a noiseless ray tracing 10 beam sensor in an arc of 90 degrees and a maximum sensing distance of 10 meters, with readouts normalized to a range of [0, 1] to make it a percentage of the overall maximum ray detection distance. A wall is randomly placed in the path of the agent, at a location drawn from a uniform distribution from 1.8 to 3.8 meters ahead of the agent’s start location. 79 | 80 | **Similarly:** 81 | 82 | + HopperWithSensor-v0 83 | + HopperWall-v0 84 | + HalfCheetahWithSensor-v0 85 | + HalfCheetahWall-v0 86 | + Walker2dWithSensor-v0 87 | + Walker2dWall-v0 88 | + HumanoidWithSensor-v0 89 | + HumanoidWall-v0 90 | + HumanoidStandupWithSensor-v0 91 | + HumanoidStandupAndRunWithSensor-v0 92 | + HumanoidStandupAndRunWall-v0 93 | 94 | ### Arm Environments 95 | 96 | In the OpenAI Striker and Pusher tasks, a 7 DoF arm tries to hit a ball into a hole or push a peg to a goal position respectively. We extend these tasks to randomly move the goal position for the Pusher task, and randomly move the ball start position for the Striker task. As in the original tasks, we bound the varied goal or start state within some restricted uniform distribution as domain appropriate. 97 | 98 | **StrikerMovingStart-v0** 99 | 100 | The Striker-v0 env from OpenAI gym, but with a moving starting position as well as goal. 101 | 102 | **PusherMovingGoal-v0** 103 | 104 | The Pusher-v0 env from OpenAI gym, but with a moving goal position as well as start position. 105 | 106 | ### Morphological Modifications 107 | 108 | For the running agents, we provide environments which vary the morphology of a specific body part of the agent. We define “Big” bodyparts as scaling the mass and width of the limb by 1.25 and “Small” bodyparts as being scaled by 0.75. We also group categories of limbs for environments with multiple appendages (i.e. humanoid torso includes the abdomen; humanoid thigh also includes the hips; all appendages encompass both the left/right or front/back simultaneously such that a modified thigh includes both thighs). 109 | 110 | **Envs**: 111 | 112 | + HopperSmallFoot-v0 113 | + HopperSmallLeg-v0 114 | + HopperSmallThigh-v0 115 | + HopperSmallTorso-v0 116 | + HopperBigFoot-v0 117 | + HopperBigLeg-v0 118 | + HopperBigThigh-v0 119 | + HopperBigTorso-v0 120 | + Walker2dSmallFoot-v0 121 | + Walker2dSmallLeg-v0 122 | + Walker2dSmallThigh-v0 123 | + Walker2dSmallTorso-v0 124 | + Walker2dBigFoot-v0 125 | + Walker2dBigLeg-v0 126 | + Walker2dBigThigh-v0 127 | + Walker2dBigTorso-v0 128 | + HalfCheetahSmallFoot-v0 129 | + HalfCheetahSmallLeg-v0 130 | + HalfCheetahSmallThigh-v0 131 | + HalfCheetahSmallTorso-v0 132 | + HalfCheetahBigFoot-v0 133 | + HalfCheetahBigLeg-v0 134 | + HalfCheetahBigThigh-v0 135 | + HalfCheetahBigTorso-v0 136 | + HumanoidSmallFoot-v0 137 | + HumanoidSmallLeg-v0 138 | + HumanoidSmallThigh-v0 139 | + HumanoidSmallTorso-v0 140 | + HumanoidBigFoot-v0 141 | + HumanoidBigLeg-v0 142 | + HumanoidBigThigh-v0 143 | + HumanoidBigTorso-v0 144 | + HumanoidSmallHead-v0 145 | + HumanoidBigHead-v0 146 | + HumanoidSmallArm-v0 147 | + HumanoidBigArm-v0 148 | + HumanoidSmallHand-v0 149 | + HumanoidBigHand-v0 150 | -------------------------------------------------------------------------------- /docs/wrappers.md: -------------------------------------------------------------------------------- 1 | ## Wrappers 2 | 3 | We provide a set of wrappers based on rllab to wrap OpenAI Gym environments, these typically will filter observations, etc. 4 | 5 | These wrappers require rllab as a dependency, but we're working on removing this dependency. Please submit a Pull Request if you want it done sooner! 6 | 7 | ### Current Wrappers 8 | 9 | We only provide one wrapper for now: 10 | 11 | **ObservationTransformWrapper** - Provides a list of transformers and runs all observations through the transformers. 12 | 13 | ### Current Transformers 14 | 15 | **SimpleNormalizePixelIntensitiesTransformer** - Normalizes pixels (divide by 255) 16 | 17 | **ResizeImageTransformer** - Resizes an image by a given percentage 18 | 19 | **RandomSensorMaskTransformer** - Randomly occludes some given percentage of the observations at every timestep 20 | 21 | ### Example Usage 22 | 23 | ```python 24 | gymenv = gym.make("Hopper-v1") 25 | transformers = [SimpleNormalizePixelIntensitiesTransformer(), ResizeImageTransformer(fraction_of_current_size=.35)] 26 | transformed_env = ObservationTransformWrapper(gymenv, transformers) 27 | ``` 28 | -------------------------------------------------------------------------------- /examples/image_based_navigation_example.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import time 4 | from gym_extensions.continuous import gym_navigation_2d 5 | import matplotlib.pyplot as plt 6 | 7 | env = gym.make('Image-Based-Navigation-2d-Map0-Goal0-v0') 8 | observation = env.reset() 9 | img = plt.imshow(observation, interpolation="None", origin='lower') 10 | 11 | for t in range(500): 12 | env.render() 13 | action = env.action_space.sample() 14 | observation, reward, done, info = env.step(np.array([1.,1.])) 15 | 16 | if not t%5: 17 | img.set_data(observation) 18 | plt.pause(1e-15) 19 | if done: 20 | print("Episode finished after {} timesteps".format(t+1)) 21 | break 22 | -------------------------------------------------------------------------------- /examples/manual_control_example.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import gym.spaces 3 | import gym.utils 4 | import gym.utils.seeding 5 | import numpy as np 6 | from gym_extensions.continuous import mujoco 7 | import sys 8 | try: 9 | from gym.envs.mujoco.mujoco_env import MujocoEnv 10 | except: 11 | print("This example depends on mujoco, see README.md for information on installing.") 12 | sys.exit() 13 | try: 14 | import roboschool 15 | from roboschool.gym_mujoco_xml_env import RoboschoolMujocoXmlEnv 16 | except: 17 | print("""This example depends on roboschool. To install roboschool, see instructions at: 18 | https://github.com/openai/roboschool""") 19 | sys.exit() 20 | 21 | # From https://raw.githubusercontent.com/openai/roboschool/master/roboschool/test_manual.py 22 | # Run this file to test environments using manual control: 23 | # python test_manual.py RoboschoolHopper-v0 24 | 25 | class TestKeyboardControl: 26 | def __init__(self): 27 | self.keys = {} 28 | self.control = np.zeros(9) 29 | self.human_pause = False 30 | self.human_done = False 31 | def key(self, event_type, key, modifiers): 32 | self.keys[key] = +1 if event_type==6 else 0 33 | #print ("event_type", event_type, "key", key, "modifiers", modifiers) 34 | self.control[0] = self.keys.get(0x1000014, 0) - self.keys.get(0x1000012, 0) 35 | self.control[1] = self.keys.get(0x1000013, 0) - self.keys.get(0x1000015, 0) 36 | self.control[2] = self.keys.get(ord('A'), 0) - self.keys.get(ord('Z'), 0) 37 | self.control[3] = self.keys.get(ord('S'), 0) - self.keys.get(ord('X'), 0) 38 | self.control[4] = self.keys.get(ord('D'), 0) - self.keys.get(ord('C'), 0) 39 | self.control[5] = self.keys.get(ord('F'), 0) - self.keys.get(ord('V'), 0) 40 | self.control[6] = self.keys.get(ord('G'), 0) - self.keys.get(ord('B'), 0) 41 | self.control[7] = self.keys.get(ord('H'), 0) - self.keys.get(ord('N'), 0) 42 | self.control[8] = self.keys.get(ord('J'), 0) - self.keys.get(ord('M'), 0) 43 | if event_type==6 and key==32: # press Space to pause 44 | self.human_pause = 1 - self.human_pause 45 | if event_type==6 and key==0x1000004: # press Enter to restart 46 | self.human_done = True 47 | 48 | 49 | class TestKeyboardControlMuj: 50 | def __init__(self): 51 | self.keys = {} 52 | self.control = np.zeros(9) 53 | self.human_pause = False 54 | self.human_done = False 55 | 56 | def key(self, window, key, scancode, event_type, modifiers): 57 | self.keys[key] = +1 if event_type==1 else 0 58 | # print(key) 59 | #print ("event_type", event_type, "key", key, "modifiers", modifiers) 60 | self.control[0] = self.keys.get(265, 0) - self.keys.get(264, 0) 61 | self.control[1] = self.keys.get(262, 0) - self.keys.get(263, 0) 62 | self.control[2] = self.keys.get(ord('A'), 0) - self.keys.get(ord('Z'), 0) 63 | self.control[3] = self.keys.get(ord('S'), 0) - self.keys.get(ord('X'), 0) 64 | self.control[4] = self.keys.get(ord('D'), 0) - self.keys.get(ord('C'), 0) 65 | self.control[5] = self.keys.get(ord('F'), 0) - self.keys.get(ord('V'), 0) 66 | self.control[6] = self.keys.get(ord('G'), 0) - self.keys.get(ord('B'), 0) 67 | self.control[7] = self.keys.get(ord('H'), 0) - self.keys.get(ord('N'), 0) 68 | self.control[8] = self.keys.get(ord('J'), 0) - self.keys.get(ord('M'), 0) 69 | if event_type==1 and key==32: # press Space to pause 70 | self.human_pause = 1 - self.human_pause 71 | if event_type==1 and key==257: # press Enter to restart 72 | self.human_done = True 73 | 74 | 75 | usage = """ 76 | This is manual test. Usage: 77 | %s 78 | 79 | Keyboard shortcuts: 80 | * F1 toggle slow motion 81 | * F2 toggle captions 82 | * F3 toggle HUD: observations, actions, reward 83 | * ENTER to restart episode (works only in this test) 84 | * SPACE to pause (works only in this test) 85 | * Up/down, left/right, a/z, s/x, d/c, f/v, g/b, h/n, j/m to control robot (works only in this test) 86 | """ 87 | 88 | def test(env_id): 89 | print(usage % sys.argv[0]) 90 | env = gym.make(env_id) 91 | # import pdb; pdb.set_trace() 92 | env.reset() # This creates default single player scene 93 | if isinstance(env.unwrapped, MujocoEnv): 94 | ctrl = TestKeyboardControlMuj() 95 | from mujoco_py.glfw import set_key_callback 96 | set_key_callback(env.unwrapped._get_viewer().window, ctrl.key) 97 | elif isinstance(env.unwrapped, RoboschoolMujocoXmlEnv): 98 | ctrl = TestKeyboardControl() 99 | env.unwrapped.scene.cpp_world.set_key_callback(ctrl.key) 100 | if "camera" in env.unwrapped.__dict__: 101 | env.unwrapped.camera.set_key_callback(ctrl.key) 102 | else: 103 | raise NotImplementedError 104 | 105 | a = np.zeros(env.action_space.shape) 106 | copy_n = min(len(a), len(ctrl.control)) 107 | ctrl.human_pause = False 108 | 109 | while 1: 110 | ctrl.human_done = False 111 | sn = env.reset() 112 | frame = 0 113 | reward = 0.0 114 | episode_over = False 115 | while 1: 116 | s = sn 117 | a[:copy_n] = ctrl.control[:copy_n] 118 | # import pdb; pdb.set_trace() 119 | sn, rplus, done, info = env.step(a) 120 | reward += rplus 121 | #env.render("rgb_array") 122 | episode_over |= done 123 | still_visible = True 124 | # import pdb; pdb.set_trace() 125 | while True: 126 | env.render("human") 127 | #env.unwrapped.camera.test_window() 128 | if not ctrl.human_pause: break 129 | if ctrl.human_done: break 130 | if not still_visible: break 131 | frame += 1 132 | if not still_visible: break 133 | 134 | if __name__=="__main__": 135 | env_id = "RoboschoolHumanoid-v0" if len(sys.argv) <= 1 else sys.argv[1] 136 | test(env_id) 137 | -------------------------------------------------------------------------------- /examples/range_based_navigation_example.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | from gym_extensions.continuous.gym_navigation_2d.env_generator import EnvironmentGenerator, Environment, EnvironmentCollection, Obstacle 4 | import numpy as np 5 | import time 6 | 7 | env = gym.make('Limited-Range-Based-Navigation-2d-Map1-Goal1-v0') 8 | 9 | observation = env.reset() 10 | dt1 = [] 11 | dt2 = [] 12 | 13 | for t in range(100): 14 | env.render() 15 | 16 | start = time.time() 17 | action = env.action_space.sample() 18 | end = time.time() 19 | 20 | dt1.append(end-start) 21 | 22 | start = time.time() 23 | observation, reward, done, info = env.step(action) 24 | end = time.time() 25 | 26 | dt2.append(end-start) 27 | 28 | if done: 29 | print("Episode finished after {} timesteps".format(t+1)) 30 | break 31 | 32 | print( sum(dt1)/len(dt1), sum(dt2)/len(dt2)) 33 | -------------------------------------------------------------------------------- /examples/state_based_navigation_example.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | from gym_extensions.continuous.gym_navigation_2d.env_generator import Environment, EnvironmentCollection, Obstacle 4 | import numpy as np 5 | import time 6 | import timeit 7 | 8 | env = gym.make('State-Based-Navigation-2d-Map0-Goal2-v0') 9 | 10 | observation = env.reset() 11 | dt1 = [] 12 | dt2 = [] 13 | for t in range(300): 14 | env.render() 15 | 16 | start = time.time() 17 | action = env.action_space.sample() 18 | end = time.time() 19 | 20 | dt1.append(end - start) 21 | 22 | start = time.time() 23 | observation, reward, done, info = env.step(action) 24 | end = time.time() 25 | 26 | dt2.append(end - start) 27 | 28 | 29 | if done: 30 | print("Episode finished after {} timesteps".format(t+1)) 31 | break 32 | 33 | print( sum(dt1)/len(dt1), sum(dt2)/len(dt2)) 34 | -------------------------------------------------------------------------------- /gym_extensions/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | if sys.version_info < (2,7): 3 | sys.exit('Sorry, Python < 2.7 is not supported, install Python 3.5.2') 4 | -------------------------------------------------------------------------------- /gym_extensions/continuous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/gym_extensions/continuous/__init__.py -------------------------------------------------------------------------------- /gym_extensions/continuous/box2d/__init__.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import os 3 | 4 | custom_envs = { 5 | # Contextual environments 6 | "FalconLander-v0": 7 | dict(path='gym_extensions.continuous.box2d.falcon_drone_ship_lander:FalconLander', 8 | max_episode_steps=1000, 9 | kwargs= dict()) 10 | } 11 | 12 | def register_custom_envs(): 13 | for key, value in custom_envs.items(): 14 | arg_dict = dict(id=key, 15 | entry_point=value["path"], 16 | max_episode_steps=value["max_episode_steps"], 17 | kwargs=value["kwargs"]) 18 | 19 | if "reward_threshold" in value: 20 | arg_dict["reward_threshold"] = value["reward_threshold"] 21 | 22 | gym.envs.register(**arg_dict) 23 | 24 | register_custom_envs() 25 | -------------------------------------------------------------------------------- /gym_extensions/continuous/box2d/falcon_drone_ship_lander.py: -------------------------------------------------------------------------------- 1 | import sys, math 2 | import numpy as np 3 | import pyglet 4 | 5 | import Box2D 6 | from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener) 7 | import pygame 8 | from pygame.locals import K_RIGHT, K_LEFT, KEYDOWN, KEYUP 9 | 10 | import gym 11 | from gym import spaces 12 | from gym.utils import seeding 13 | 14 | 15 | # Inspired by the existing OpenAI Gym LunarLander-v0 environment, as well as by Elon Musk's SpaceX crazy projects ! 16 | __author__ = "Victor Barbaros" 17 | __credits__ = ["OpenAi Gym", "Oleg Klimov"] 18 | __version__ = "0.0.1" 19 | __maintainer__ = "Victor Barbaros" 20 | __github_username__ = "vBarbaros" 21 | 22 | FPS = 60 23 | SCALE = 30.0 #default:30 affects how fast-paced the game is, forces should be adjusted as well 24 | 25 | MAIN_ENGINE_POWER = 100.0 # 13.0 26 | SIDE_ENGINE_POWER = 3.0 # 0.6 27 | 28 | INITIAL_RANDOM = 500 # Set 1500 to make game harder 29 | 30 | FALCON_POLY =[ 31 | (-4,+150), (-13,135),(-13,0), (-13,-30), 32 | (+13,-30),(+13,0),(+13,+135), (+4,+150) 33 | ] 34 | 35 | LEG_AWAY = 12 #20 36 | LEG_DOWN = 30 # 18 37 | LEG_W, LEG_H = 3, 10 38 | LEG_SPRING_TORQUE = 40 39 | 40 | SIDE_ENGINE_HEIGHT = 130.0 #14 41 | SIDE_ENGINE_AWAY = 15.0 #12 42 | 43 | DRONE_SHIP_H = 0.25 44 | DRONE_SHIP_W = 2.5 45 | SEA_LEVEL = 30/SCALE 46 | GOING_LEFT = False 47 | CONST_FORCE_DRONE_SHIP = 0.75 48 | FREQUENCY_FACTOR = 250 49 | 50 | VIEWPORT_W = 1450 51 | VIEWPORT_H = 820 52 | 53 | 54 | class ContactDetector(contactListener): 55 | def __init__(self, env): 56 | contactListener.__init__(self) 57 | self.env = env 58 | def BeginContact(self, contact): 59 | if self.env.falcon_rocket==contact.fixtureA.body or self.env.falcon_rocket==contact.fixtureB.body: 60 | self.env.game_over = True 61 | for i in range(2): 62 | if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]: 63 | self.env.legs[i].ground_contact = True 64 | def EndContact(self, contact): 65 | for i in range(2): 66 | if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]: 67 | self.env.legs[i].ground_contact = False 68 | 69 | class FalconLander(gym.Env): 70 | metadata = { 71 | 'render.modes': ['human', 'rgb_array'], 72 | 'video.frames_per_second' : FPS 73 | } 74 | 75 | continuous = False 76 | 77 | def __init__(self): 78 | pygame.init() 79 | self._seed() 80 | self.viewer = None 81 | 82 | self.world = Box2D.b2World() 83 | self.sea_surface = None 84 | self.falcon_rocket = None 85 | self.floating_drone_ship = None 86 | self.particles = [] 87 | 88 | self.prev_reward = None 89 | 90 | high = np.array([np.inf]*8) # useful range is -1 .. +1, but spikes can be higher 91 | self.observation_space = spaces.Box(-high, high) 92 | 93 | if self.continuous: 94 | # Action is two floats [main engine, left-right engines]. 95 | # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. 96 | # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off 97 | self.action_space = spaces.Box(-1, +1, (2,)) 98 | else: 99 | # Nop, fire left engine, main engine, right engine 100 | self.action_space = spaces.Discrete(4) 101 | 102 | self._reset() 103 | 104 | 105 | def _seed(self, seed=None): 106 | self.np_random, seed = seeding.np_random(seed) 107 | return [seed] 108 | 109 | 110 | def _destroy(self): 111 | if not self.sea_surface: return 112 | self.world.contactListener = None 113 | self._clean_particles(True) 114 | self.world.DestroyBody(self.sea_surface) 115 | self.sea_surface = None 116 | 117 | self.world.DestroyBody(self.floating_drone_ship) 118 | self.floating_drone_ship = None 119 | 120 | self.world.DestroyBody(self.falcon_rocket) 121 | self.falcon_rocket = None 122 | self.world.DestroyBody(self.legs[0]) 123 | self.world.DestroyBody(self.legs[1]) 124 | 125 | 126 | def _reset(self): 127 | self._destroy() 128 | self.world.contactListener_keepref = ContactDetector(self) 129 | self.world.contactListener = self.world.contactListener_keepref 130 | self.game_over = False 131 | self.prev_shaping = None 132 | 133 | W = VIEWPORT_W/SCALE 134 | H = VIEWPORT_H/SCALE 135 | 136 | # create the sea surface into which the flacon might draw (with no collision) 137 | f = fixtureDef( 138 | shape=polygonShape(box=(W,SEA_LEVEL), radius=0.0), 139 | density=0.5, 140 | friction=0.03) 141 | 142 | self.sea_surface = self.world.CreateStaticBody( 143 | position=(0, SEA_LEVEL), angle=0, 144 | fixtures = f) 145 | 146 | # Create the floating drone ship 147 | f = fixtureDef( 148 | shape=polygonShape(box=(DRONE_SHIP_W,DRONE_SHIP_H), 149 | radius=0.0), density=0.4, friction=0.05, 150 | categoryBits=0x0020, 151 | maskBits=0x001) #, userData=self.logo_img) 152 | 153 | self.floating_drone_ship = self.world.CreateDynamicBody( 154 | position=(DRONE_SHIP_W/SCALE, SEA_LEVEL), angle=0, linearDamping = 0.7, angularDamping = 0.3, 155 | fixtures = f) 156 | 157 | self.floating_drone_ship.color1 = (0.1,0.1,0.1) 158 | self.floating_drone_ship.color2 = (0,0,0) 159 | 160 | self.sea_surface.color1 = (0.5,0.4,0.9) 161 | self.sea_surface.color2 = (0.3,0.3,0.5) 162 | 163 | 164 | initial_y = VIEWPORT_H/SCALE 165 | # create the falcon lander 166 | self.falcon_rocket = self.world.CreateDynamicBody( 167 | position = (VIEWPORT_W/SCALE/2, initial_y), 168 | angle=0.0, 169 | fixtures = fixtureDef( 170 | shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in FALCON_POLY ]), 171 | density=5.0, # 5.0 172 | friction=0.1, 173 | categoryBits=0x001, 174 | maskBits=0x0020, # collide only with floating_drone_ship 175 | restitution=0.0) #, userData=self.logo_img) # 0.99 bouncy 176 | ) 177 | self.falcon_rocket.color1 = (1.0,1.0,1.0) 178 | self.falcon_rocket.color2 = (0,0,0) 179 | 180 | self.falcon_rocket.ApplyForceToCenter( ( 181 | self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 182 | self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM) 183 | ), True) 184 | 185 | # create the legs of the falcon rocket 186 | self.legs = [] 187 | for i in [-1,+1]: 188 | leg = self.world.CreateDynamicBody( 189 | position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y), 190 | angle = (i*0.05), 191 | fixtures = fixtureDef( 192 | shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)), 193 | density=1.0, 194 | restitution=0.0, 195 | categoryBits=0x001, 196 | maskBits=0x0020) 197 | ) 198 | leg.ground_contact = False 199 | leg.color1 = (0,0,0) 200 | leg.color2 = (0,0,0) 201 | rjd = revoluteJointDef( 202 | bodyA=self.falcon_rocket, 203 | bodyB=leg, 204 | localAnchorA=(0, 0), 205 | localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE), 206 | enableMotor=True, 207 | enableLimit=True, 208 | maxMotorTorque=LEG_SPRING_TORQUE, 209 | motorSpeed=+0.3*i # low enough not to jump back into the sky 210 | ) 211 | if i==-1: 212 | rjd.lowerAngle = +0.9 - 0.5 # Yes, the most esoteric numbers here, angles legs have freedom to travel within 213 | rjd.upperAngle = +0.9 214 | else: 215 | rjd.lowerAngle = -0.9 216 | rjd.upperAngle = -0.9 + 0.5 217 | leg.joint = self.world.CreateJoint(rjd) 218 | self.legs.append(leg) 219 | 220 | self.drawlist = [self.falcon_rocket] + self.legs + [self.floating_drone_ship] + [self.sea_surface] 221 | 222 | return self._step(np.array([0,0]) if self.continuous else 0)[0] 223 | 224 | 225 | def _create_particle(self, mass, x, y, ttl): 226 | # create those butifull particles that bubble off when the force is applied 227 | p = self.world.CreateDynamicBody( 228 | position = (x,y), 229 | angle=0.0, 230 | fixtures = fixtureDef( 231 | shape=circleShape(radius=2/SCALE, pos=(0,0)), 232 | density=mass, 233 | friction=0.1, 234 | categoryBits=0x0100, 235 | maskBits=0x001, # collide only with ground 236 | restitution=0.3) 237 | ) 238 | p.ttl = ttl 239 | self.particles.append(p) 240 | self._clean_particles(False) 241 | return p 242 | 243 | def _clean_particles(self, all): 244 | while self.particles and (all or self.particles[0].ttl<0): 245 | self.world.DestroyBody(self.particles.pop(0)) 246 | 247 | 248 | def control_floating_platform(self): 249 | '''It's controlled autonomously so the learner has no direct access 250 | ''' 251 | 252 | global GOING_LEFT 253 | fx = 1 254 | fy = 0 255 | p1 = self.floating_drone_ship.GetWorldPoint(localPoint=(3.25, 5.5)) 256 | p2 = self.floating_drone_ship.GetWorldPoint(localPoint=(3.75, 7.5)) 257 | width, fy = p1[0], p1[1] 258 | 259 | new_y_p1 = 2.5*math.cos(FREQUENCY_FACTOR) 260 | new_y_p2 = 2.5*math.sin(FREQUENCY_FACTOR) 261 | 262 | if not GOING_LEFT: 263 | self.floating_drone_ship.ApplyForce(force=(CONST_FORCE_DRONE_SHIP*fx,CONST_FORCE_DRONE_SHIP*new_y_p2), point=p2, wake=True) 264 | self.floating_drone_ship.ApplyForce(force=(CONST_FORCE_DRONE_SHIP/2*fx,CONST_FORCE_DRONE_SHIP/2*new_y_p1), point=p1, wake=True) 265 | if width > (VIEWPORT_W/SCALE*0.7): 266 | GOING_LEFT = True 267 | else: 268 | self.floating_drone_ship.ApplyForce(force=((-1)*CONST_FORCE_DRONE_SHIP*fx,-CONST_FORCE_DRONE_SHIP*new_y_p1), point=p1, wake=True) 269 | self.floating_drone_ship.ApplyForce(force=((-1)*CONST_FORCE_DRONE_SHIP/2*fx,-CONST_FORCE_DRONE_SHIP/2*new_y_p2), point=p2, wake=True) 270 | if width <= (VIEWPORT_W/SCALE*0.3): 271 | GOING_LEFT = False 272 | 273 | 274 | def _step(self, action): 275 | # act on the floating platform, it's autonomous 276 | self.control_floating_platform() 277 | 278 | # Engines 279 | tip = (math.sin(self.falcon_rocket.angle), math.cos(self.falcon_rocket.angle)) 280 | side = (-tip[1], tip[0]); 281 | dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)] 282 | 283 | m_power = 0.0 284 | if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2): 285 | # Main engine 286 | if self.continuous: 287 | m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0 288 | assert m_power>=0.5 and m_power <= 1.0 289 | else: 290 | m_power = 1.0 291 | orient_x = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness 292 | orient_y = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1] 293 | impulse_pos = (self.falcon_rocket.position[0] + orient_x, self.falcon_rocket.position[1] + orient_y) 294 | p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate 295 | p.ApplyLinearImpulse( ( orient_x*MAIN_ENGINE_POWER*m_power, orient_y*MAIN_ENGINE_POWER*m_power), impulse_pos, True) 296 | self.falcon_rocket.ApplyLinearImpulse( (-orient_x*MAIN_ENGINE_POWER*m_power, -orient_y*MAIN_ENGINE_POWER*m_power), impulse_pos, True) 297 | 298 | s_power = 0.0 299 | if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]): 300 | # Orientation engines 301 | if self.continuous: 302 | direction = np.sign(action[1]) 303 | s_power = np.clip(np.abs(action[1]), 0.5,1.0) 304 | assert s_power>=0.5 and s_power <= 1.0 305 | else: 306 | direction = action-2 307 | s_power = 1.0 308 | orient_x = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE) 309 | orient_y = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE) 310 | impulse_pos = (self.falcon_rocket.position[0] + orient_x - tip[0]*17/SCALE, self.falcon_rocket.position[1] + orient_y + tip[1]*SIDE_ENGINE_HEIGHT/SCALE) 311 | p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power) 312 | p.ApplyLinearImpulse( ( orient_x*SIDE_ENGINE_POWER*s_power, orient_y*SIDE_ENGINE_POWER*s_power), impulse_pos, True) 313 | self.falcon_rocket.ApplyLinearImpulse( (-orient_x*SIDE_ENGINE_POWER*s_power, -orient_y*SIDE_ENGINE_POWER*s_power), impulse_pos, True) 314 | 315 | self.world.Step(1.0/FPS, 6*30, 2*30) 316 | 317 | pos = self.falcon_rocket.position 318 | vel = self.falcon_rocket.linearVelocity 319 | 320 | pos_floating_drone_ship = self.floating_drone_ship.position 321 | vel_floating_drone_ship = self.floating_drone_ship.linearVelocity 322 | 323 | state = [ 324 | (pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2), 325 | (pos.y - (DRONE_SHIP_H + LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2), 326 | vel.x*(VIEWPORT_W/SCALE/2)/FPS, 327 | vel.y*(VIEWPORT_H/SCALE/2)/FPS, 328 | self.falcon_rocket.angle, 329 | 20.0*self.falcon_rocket.angularVelocity, 330 | 1.0 if (self.legs[0].ground_contact and self.legs[1].ground_contact) else 0.0, 331 | 1.0 if (self.legs[0].ground_contact and self.legs[1].ground_contact) else 0.0, 332 | (pos_floating_drone_ship.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2), 333 | pos_floating_drone_ship.y / (VIEWPORT_H/SCALE/2), 334 | vel_floating_drone_ship.x*(VIEWPORT_W/SCALE/2)/FPS, 335 | vel_floating_drone_ship.y*(VIEWPORT_H/SCALE/2)/FPS, 336 | self.floating_drone_ship.angle 337 | ] 338 | 339 | assert len(state)==13 340 | 341 | reward = 0 342 | 343 | shaping = \ 344 | - 100*np.sqrt( (state[0] - state[8])**2 + (state[1] - state[9])**2) \ 345 | - 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \ 346 | - 100*abs(state[4]) + 10*state[6] + 10*state[7] \ 347 | - 10*abs(state[8]) - 10*abs(state[10]) - 10*abs(state[11]) 348 | # in the last line above: lower values of x, dx and angle of drone ship => higher reward 349 | 350 | if self.prev_shaping is not None: 351 | reward = shaping - self.prev_shaping 352 | self.prev_shaping = shaping 353 | 354 | reward -= m_power*0.15 # In the floating ship version, the penalty should be smaller 355 | reward -= s_power*0.05 356 | 357 | done = False 358 | 359 | DRONE_LEVEL = state[9] 360 | if self.game_over or state[1] < DRONE_LEVEL: 361 | done = True 362 | reward = -150 363 | 364 | if not self.falcon_rocket.awake and (state[6] == 1 and state[7] == 1): 365 | done = True 366 | reward = +150 367 | 368 | elif not self.falcon_rocket.awake and (state[6] != 1 and state[7] != 1):#if not landed on both legs, penalize 369 | done = True 370 | reward = -250 371 | 372 | return np.array(state), reward, done, {} 373 | 374 | 375 | def _render(self, mode='human', close=False): 376 | if close: 377 | if self.viewer is not None: 378 | self.viewer.close() 379 | self.viewer = None 380 | return 381 | 382 | from gym.envs.classic_control import rendering 383 | if self.viewer is None: 384 | self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H) 385 | self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE) 386 | 387 | for obj in self.particles: 388 | obj.ttl -= 0.15 389 | obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl)) 390 | obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl)) 391 | 392 | self._clean_particles(False) 393 | 394 | for obj in self.particles + self.drawlist: 395 | for f in obj.fixtures: 396 | trans = f.body.transform 397 | if type(f.shape) is circleShape: 398 | t = rendering.Transform(translation=trans*f.shape.pos) 399 | self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t) 400 | self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t) 401 | else: 402 | path = [trans*v for v in f.shape.vertices] 403 | self.viewer.draw_polygon(path, color=obj.color1) 404 | path.append(path[0]) 405 | self.viewer.draw_polyline(path, color=obj.color2, linewidth=2) 406 | 407 | return self.viewer.render(return_rgb_array = mode=='rgb_array') 408 | 409 | 410 | class FalconLanderContinuous(FalconLander): 411 | continuous = True 412 | 413 | def heuristic(env, s): 414 | # Heuristic for: 415 | # 1. Testing. 416 | # 2. Demonstration rollout. 417 | 418 | #PID to get closer to the floating drone ship 419 | cross_err = math.sqrt( (s[0] - (s[8]))**2 + (s[1] - s[9])**2 ) 420 | #print 'cross_error: ', cross_err 421 | angle_targ = (s[0]-s[8])*0.5 + s[2]*0.85 # angle should point towards center (s[0] is horizontal coordinate, s[2] horiz. speed) 422 | if angle_targ > 0.3: 423 | angle_targ = 0.3 # more than 0.4 radians (22 degrees) is bad 424 | if angle_targ < -0.3: 425 | angle_targ = -0.3 426 | 427 | # PID controller: s[4] angle, s[5] angularSpeed 428 | angle_todo = (angle_targ - s[4])*0.55 - (s[5])*0.25 429 | #print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo)) 430 | 431 | hover_targ = 0.25*(cross_err) 432 | # PID controller: s[1] vertical coordinate s[3] vertical speed 433 | hover_todo = (hover_targ/s[1] - s[1])*0.6 - (s[3])*0.4 434 | #print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo)) 435 | 436 | if s[6] or s[7]: # both legs have contact 437 | angle_todo = 0 438 | hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact 439 | 440 | if env.continuous: 441 | a = np.array( [hover_todo*30 - 1, -angle_todo*30] ) 442 | a = np.clip(a, -1, +1) 443 | 444 | return a 445 | 446 | 447 | 448 | def key_control(env, s): 449 | action_done = False 450 | action = [0, 0] 451 | #print type(env.logo_img) 452 | #print env.logo_img 453 | # Action is two floats [main engine, left-right engines]. 454 | # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. 455 | # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off 456 | for event in pygame.event.get(): 457 | if event.type == pygame.QUIT: 458 | action_done = True 459 | break 460 | if event.key == K_RIGHT: 461 | action[0] = 0.0 462 | action[1] = 1.0 463 | action_done = True 464 | 465 | elif event.key == K_LEFT: 466 | action[0] = 0.0 467 | action[1] = -1.0 468 | action_done = True 469 | 470 | elif event.type == KEYUP: 471 | action[0] = 1 472 | action[1] = 0 473 | action_done = True 474 | 475 | return np.array(action) 476 | 477 | 478 | if __name__=="__main__": 479 | #env = FalconLander() 480 | env = FalconLanderContinuous() 481 | s = env.reset() 482 | total_reward = 0 483 | steps = 0 484 | while True: 485 | a = heuristic(env, s) 486 | #a = key_control(env, s) 487 | s, r, done, info = env.step(a) 488 | env.render() 489 | total_reward += r 490 | if steps % 20 == 0 or done: 491 | print(["{:+0.2f}".format(x) for x in s]) 492 | print("step {} total_reward {:+0.2f}".format(steps, total_reward)) 493 | steps += 1 494 | if done: break 495 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/__init__.py: -------------------------------------------------------------------------------- 1 | from .range_based_navigation import LimitedRangeBasedPOMDPNavigation2DEnv, StateBasedMDPNavigation2DEnv 2 | from .image_based_navigation import ImageBasedNavigation2DEnv 3 | # 4 | from gym.envs import register 5 | import numpy as np 6 | 7 | # (x,y) 8 | idx_to_goal = [ (np.array([230.0, 430.0]), np.array([230.0, 130.0]), np.array([500.0, 430.0]) )] * 10 9 | idx_to_goal[1] = (np.array([130.0, 370.0]), np.array([130.0, 110.0]), np.array([520.0, 250.0]) ) 10 | idx_to_goal[2] = (np.array([530.0, 110.0]), np.array([130.0, 310.0]), np.array([460.0, 330.0]) ) 11 | idx_to_goal[3] = (np.array([400.0, 50.0]), np.array([180.0, 320.0]), np.array([430.0, 310.0]) ) 12 | idx_to_goal[4] = (np.array([180.0, 380.0]), np.array([610.0, 120.0]), np.array([420.0, 330.0]) ) 13 | idx_to_goal[5] = (np.array([500.0, 90.0]), np.array([180.0, 390.0]), np.array([380.0, 320.0]) ) 14 | idx_to_goal[6] = (np.array([480.0, 150.0]), np.array([440.0, 380.0]), np.array([310.0, 220.0]) ) 15 | idx_to_goal[7] = (np.array([500.0, 380.0]), np.array([470.0, 280.0]), np.array([270.0, 280.0]) ) 16 | idx_to_goal[8] = (np.array([250.0, 440.0]), np.array([420.0, 200.0]), np.array([150.0, 180.0]) ) 17 | idx_to_goal[9] = (np.array([390.0, 110.0]), np.array([520.0, 350.0]), np.array([240.0, 310.0]) ) 18 | 19 | custom_envs = {} 20 | for i in range(10): 21 | for j in range(3): 22 | # add each env to dictionary 23 | custom_envs['State-Based-Navigation-2d-Map%d-Goal%d-v0' % (i, j)] = dict( 24 | path='gym_extensions.continuous.gym_navigation_2d:StateBasedMDPNavigation2DEnv', 25 | max_episode_steps=1000, 26 | kwargs=dict(world_idx=i, destination=idx_to_goal[i][j])) 27 | 28 | custom_envs['State-Based-Navigation-2d-Map%d-Goal%d-KnownGoalPosition-v0' % (i, j)] = dict( 29 | path='gym_extensions.continuous.gym_navigation_2d:StateBasedMDPNavigation2DEnv', 30 | max_episode_steps=1000, 31 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j])) 32 | 33 | custom_envs['State-Based-Navigation-2d-Map%d-Goal%d-KnownGoalPosition-v0' % (i, j)] = dict( 34 | path='gym_extensions.continuous.gym_navigation_2d:StateBasedMDPNavigation2DEnv', 35 | max_episode_steps=1000, 36 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j], 37 | add_self_position_to_observation=True, 38 | add_goal_position_to_observation=True)) 39 | 40 | custom_envs['Limited-Range-Based-Navigation-2d-Map%d-Goal%d-v0' % (i, j)] = dict( 41 | path='gym_extensions.continuous.gym_navigation_2d:LimitedRangeBasedPOMDPNavigation2DEnv', 42 | max_episode_steps=1000, 43 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j])) 44 | 45 | custom_envs['Limited-Range-Based-Navigation-2d-Map%d-Goal%d-KnownPositions-v0' % (i, j)] = dict( 46 | path='gym_extensions.continuous.gym_navigation_2d:LimitedRangeBasedPOMDPNavigation2DEnv', 47 | max_episode_steps=1000, 48 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j], 49 | add_self_position_to_observation=True, 50 | add_goal_position_to_observation=True)) 51 | 52 | custom_envs['Image-Based-Navigation-2d-Map%d-Goal%d-v0' % (i, j)] = dict( 53 | path='gym_extensions.continuous.gym_navigation_2d:ImageBasedNavigation2DEnv', 54 | max_episode_steps=1000, 55 | kwargs=dict(world_idx=i, destination = idx_to_goal[i][j])) 56 | 57 | # register each env into 58 | def register_custom_envs(): 59 | for key, value in custom_envs.items(): 60 | arg_dict = dict(id=key, 61 | entry_point=value['path'], 62 | max_episode_steps=value['max_episode_steps'], 63 | kwargs=value['kwargs']) 64 | if 'reward_threshold' in value.keys(): 65 | arg_dict['reward_threshold'] = value['reward_threshold'] 66 | register(**arg_dict) 67 | 68 | register_custom_envs() 69 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/env_generator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import scipy 4 | from scipy import stats 5 | import numpy as np 6 | from math import sqrt, asin, cos, sin, atan2 7 | import networkx as nx 8 | from .env_utils import Obstacle, Environment 9 | from .geometry_utils import * 10 | import sys 11 | import pickle 12 | 13 | 14 | class EnvironmentGenerator(object): 15 | 16 | def __init__(self, x_range, y_range, width_range, height_range): 17 | self.x_range = x_range 18 | self.y_range = y_range 19 | self.width_range = width_range 20 | self.height_range = height_range 21 | 22 | def sample_spatial_poisson_process(self, rate): 23 | xmin, xmax = self.x_range 24 | ymin, ymax = self.y_range 25 | 26 | dx = xmax - xmin 27 | dy = ymax - ymin 28 | 29 | N = stats.poisson( rate * dx * dy ).rvs() 30 | x = stats.uniform.rvs(xmin, dx, ((N, 1)) ) 31 | y = stats.uniform.rvs(ymin, dy, ((N, 1)) ) 32 | 33 | centers = np.hstack((x,y)) 34 | return centers 35 | 36 | def sample_axis_aligned_rectangles(self, density): 37 | wmin, wmax = self.width_range 38 | hmin, hmax = self.height_range 39 | 40 | dw = wmax - wmin 41 | dh = hmax - hmin 42 | 43 | centers = self.sample_spatial_poisson_process(rate=density) 44 | widths = stats.uniform.rvs(wmin, dw, ((centers.shape[0], 1)) ) 45 | heights = stats.uniform.rvs(hmin, dh, ((centers.shape[0], 1)) ) 46 | 47 | # Only keep obstacels that are fully within allowed range 48 | # This is so that pixel occupancy queries are the same as 49 | # continuous distance queries, if they are ever used 50 | x_within_bounds = centers[:, 0] + widths[:, 0]/2.0 <= x_range[1] 51 | x_within_bounds = x_within_bounds * (centers[:, 0] - widths[:, 0]/2.0 >= x_range[0]) 52 | 53 | y_within_bounds = centers[:, 1] + heights[:, 0]/2.0 <= y_range[1] 54 | y_within_bounds = y_within_bounds * (centers[:, 1] - heights[:, 0]/2.0 >= y_range[0]) 55 | 56 | valid_idx = x_within_bounds * y_within_bounds 57 | return centers[valid_idx, :], widths[valid_idx, :], heights[valid_idx, :] 58 | 59 | def merge_rectangles_into_obstacles(self, centers, widths, heights, epsilon): 60 | """Merges rectangles defined by centers, widths, heights. Two rectangles 61 | with distance < epsilon are considered part of the same object.""" 62 | 63 | G = nx.Graph() 64 | obstacles = {i: Obstacle(centers[i, :], widths[i, 0], heights[i, 0]) for i in range(len(centers))} 65 | G.add_nodes_from(obstacles.keys()) 66 | 67 | for i in obstacles: 68 | for j in obstacles: 69 | if i != j and obstacles[i].distance_to_obstacle(obstacles[j]) < epsilon: 70 | G.add_edge(i,j) 71 | 72 | merged_obstacles = {} 73 | conn_components = nx.connected_components(G) 74 | for cc in conn_components: 75 | cc = list(cc) 76 | new_obs = obstacles[cc[0]] 77 | for i in range(1, len(cc)): 78 | new_obs.merge(obstacles[cc[i]]) 79 | 80 | merged_obstacles[cc[0]] = new_obs 81 | 82 | return merged_obstacles 83 | 84 | 85 | class EnvironmentCollection(object): 86 | 87 | def __init__(self): 88 | self.x_range = [] 89 | self.y_range = [] 90 | self.width_range = [] 91 | self.height_range = [] 92 | self.num_environments = 0 93 | self.map_collection = {} 94 | 95 | def generate_random(self, x_range, y_range, width_range, height_range, density, num_environments): 96 | self.x_range = x_range 97 | self.y_range = y_range 98 | self.width_range = width_range 99 | self.height_range = height_range 100 | self.num_environments = num_environments 101 | self.map_collection = {} 102 | 103 | eg = EnvironmentGenerator(x_range, y_range, width_range, height_range) 104 | for i in range(self.num_environments): 105 | print('Sampling environment', i) 106 | centers, widths, heights = eg.sample_axis_aligned_rectangles(density) 107 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 108 | self.map_collection[i] = Environment(self.x_range, self.y_range, list(obstacles.values())) 109 | 110 | def read(self, pkl_filename): 111 | file_object = open(pkl_filename, 'rb') 112 | self.x_range, self.y_range, worlds_without_classes = pickle.load(file_object, encoding='bytes') 113 | self.map_collection = {idx: Environment(val[0], val[1], [Obstacle(c,w,h) for c,w,h in val[2]]) for idx, val in worlds_without_classes.items()} 114 | file_object.close() 115 | 116 | def save(self, pkl_filename): 117 | file_object = open(pkl_filename, 'wb') 118 | worlds_without_classes = { idx : (world.x_range, 119 | world.y_range, 120 | [(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights) for obs in world.obstacles]) 121 | 122 | for idx, world in self.map_collection.items()} 123 | 124 | pickle.dump((self.x_range, self.y_range, worlds_without_classes), file_object) 125 | file_object.close() 126 | 127 | 128 | if __name__ == "__main__": 129 | import argparse 130 | parser = argparse.ArgumentParser() 131 | parser.add_argument("filepath_to_save", help="The pickle filepath (i.e. assets/world_640x480_v0.pkl)") 132 | parser.add_argument("--num_environments", default=10, type=int) 133 | args = parser.parse_args() 134 | 135 | x_range=[0, 640] 136 | y_range=[0, 480] 137 | width_range=[10, 30] 138 | height_range=[10,50] 139 | 140 | density = 0.0003 141 | 142 | ec = EnvironmentCollection() 143 | ec.generate_random(x_range, y_range, width_range, height_range, density, args.num_environments) 144 | ec.save(args.filepath_to_save) 145 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/env_utils.py: -------------------------------------------------------------------------------- 1 | import scipy 2 | from scipy import stats 3 | import numpy as np 4 | from math import sqrt, asin, cos, sin, atan2, ceil 5 | import networkx as nx 6 | import sys 7 | import pickle 8 | 9 | from .geometry_utils import * 10 | 11 | class Obstacle(object): 12 | def __init__(self, c, w, h): 13 | 14 | if type(c) == type([]): 15 | self.rectangle_centers = c 16 | self.rectangle_widths = w 17 | self.rectangle_heights = h 18 | 19 | assert(c) 20 | 21 | self.lowest_point = np.array([ c[0][0], c[0][1] - h[0]/2.0]) 22 | self.representative_point = c[0].copy() 23 | 24 | for ca, wa, ha in zip(c, w, h): 25 | if (self.lowest_point[1] > ca[1] - ha/2.0): 26 | self.lowest_point = np.array([ca[0], ca[1] - ha/2.0]) 27 | 28 | else: 29 | self.rectangle_centers = [c] 30 | self.rectangle_widths = [w] 31 | self.rectangle_heights = [h] 32 | self.lowest_point = np.array([c[0], c[1] - h/2.0]) 33 | self.representative_point = c.copy() 34 | 35 | def __eq__(self, other): 36 | return all([ (sc == oc).all() for sc, oc in zip(self.rectangle_centers, other.rectangle_centers) ]) and \ 37 | all([ (sw == ow).all() for sw, ow in zip(self.rectangle_widths, other.rectangle_widths) ]) and \ 38 | all([ (sh == oh).all() for sh, oh in zip(self.rectangle_heights, other.rectangle_heights) ]) 39 | 40 | def append(self, ca, wa, ha): 41 | self.rectangle_centers.append(ca) 42 | self.rectangle_widths.append(wa) 43 | self.rectangle_heights.append(ha) 44 | 45 | if (self.lowest_point[1] > ca[1] - ha/2.0): 46 | self.lowest_point = np.array([ca[0], ca[1] - ha/2.0]) 47 | 48 | def merge(self, obs): 49 | self.rectangle_centers.extend(obs.rectangle_centers) 50 | self.rectangle_widths.extend(obs.rectangle_widths) 51 | self.rectangle_heights.extend(obs.rectangle_heights) 52 | 53 | for ca, wa, ha in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights): 54 | if (self.lowest_point[1] > ca[1] - ha/2.0): 55 | self.lowest_point = np.array([ca[0], ca[1] - ha/2.0]) 56 | 57 | def distance_to_point(self, x, y): 58 | p = np.array([x,y]) 59 | dist = [point_to_rectangle_distance(p, ca, wa, ha) for ca,wa,ha in zip(self.rectangle_centers, self.rectangle_widths, self.rectangle_heights)] 60 | return min(dist) 61 | 62 | def distance_to_rectangle(self, ca, wa, ha): 63 | dist = [rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb) for cb,wb,hb in zip(self.rectangle_centers, self.rectangle_widths, self.rectangle_heights)] 64 | return min(dist) 65 | 66 | def distance_to_obstacle(self, obs): 67 | dist = [self.distance_to_rectangle(ca, wa, ha) for ca,wa,ha in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights)] 68 | return min(dist) 69 | 70 | def closest_point_to(self, p): 71 | closest_points_to_segments = [closest_point_on_segment(p, s, t) for c,w,h in zip(self.rectangle_centers, self.rectangle_widths, self.rectangle_heights) \ 72 | for s,t in rectangle_edges( np.array([c[0] + w/2.0, c[1] + h/2.0]), \ 73 | np.array([c[0] + w/2.0, c[1] - h/2.0]), \ 74 | np.array([c[0] - w/2.0, c[1] - h/2.0]), \ 75 | np.array([c[0] - w/2.0, c[1] + h/2.0]) )] 76 | 77 | distances = [np.linalg.norm(p - cp) for cp in closest_points_to_segments] 78 | idx = np.argmin(distances) 79 | 80 | return closest_points_to_segments[idx] 81 | 82 | class Environment(object): 83 | def __init__(self, x_range, y_range, obstacles): 84 | self.obstacles = obstacles 85 | self.x_range = x_range 86 | self.y_range = y_range 87 | 88 | w = x_range[1] - x_range[0] 89 | h = y_range[1] - y_range[0] 90 | 91 | self.image = 255*np.ones((h, w, 3), dtype='uint8') 92 | for obs in self.obstacles: 93 | for co, wo, ho in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights): 94 | r = co[1] 95 | c = co[0] 96 | min_row = max(int(r - ho/2.0), 0) 97 | max_row = min(int(r + ho/2.0), h-1) 98 | 99 | min_col = max(int(c - wo/2.0), 0) 100 | max_col = min(int(c + wo/2.0), w-1) 101 | 102 | self.image[min_row:max_row, min_col:max_col, :] = (204, 153, 102) 103 | 104 | 105 | def __eq__(self, other): 106 | return self.obstacles == other.obstacles and self.x_range == other.x_range and self.y_range == other.y_range 107 | 108 | def point_distance_from_obstacles(self, x, y): 109 | dist = [obs.distance_to_point(x, y) for obs in self.obstacles] 110 | return min(dist) 111 | 112 | 113 | def point_is_in_free_space(self, x, y, epsilon=0.25): 114 | row = int(y) 115 | col = int(x) 116 | 117 | if (row >=0 and row < self.image.shape[0] and col >= 0 and col < self.image.shape[1]): 118 | return (self.image[row, col, :] == (255, 255, 255)).all() 119 | else: 120 | return True 121 | 122 | 123 | def range_and_bearing_to_closest_obstacle(self, x,y): 124 | dist = [(self.obstacles[i].distance_to_point(x, y), i) for i in range(len(self.obstacles))] 125 | distance_to_closest_obstacle, idx_closest = min(dist) 126 | closest_obstacle = self.obstacles[idx_closest] 127 | cp = closest_obstacle.closest_point_to(np.array([x,y])) 128 | bearing_to_closest_obstacle = atan2(cp[1]-y, cp[0]-x) 129 | return distance_to_closest_obstacle, bearing_to_closest_obstacle 130 | 131 | 132 | def segment_is_in_free_space(self, x1,y1, x2,y2, epsilon=0.5): 133 | # Note: this is assuming that 1px = 1m 134 | a = np.array([x1,y1]) 135 | b = np.array([x2,y2]) 136 | L = np.linalg.norm(b-a) 137 | return all([self.point_is_in_free_space(a[0] + i/L*(b[0]-a[0]), a[1] + i/L*(b[1]-a[1])) for i in range(ceil(L))]) 138 | 139 | 140 | def segment_distance_from_obstacles(self, x1, y1, x2, y2): 141 | 142 | if not self.segment_is_in_free_space(x1, y1, x2, y2, epsilon=1e-10): 143 | return 0.0 144 | 145 | a = np.array([x1, y1]) 146 | b = np.array([x2, y2]) 147 | 148 | dist = [point_to_segment_distance(p, a, b) for obs in self.obstacles \ 149 | for c,w,h in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights) for p in rectangle_vertices(c,w,h)] 150 | 151 | return min(dist) 152 | 153 | def raytrace(self, p, theta, max_range, n_evals=50): 154 | """TODO: implement a less naive collision algo than this""" 155 | ct = cos(theta) 156 | st = sin(theta) 157 | direction = np.array([ct, st]) 158 | 159 | a = p 160 | b = p + max_range * direction 161 | 162 | if self.segment_is_in_free_space(a[0], a[1], b[0], b[1], epsilon=1e-10): 163 | return -1.0 164 | 165 | last_free_dist = 0 166 | for e in range(n_evals): 167 | dist = e/float(n_evals) * max_range 168 | c = a + dist * direction 169 | if not self.point_is_in_free_space(c[0], c[1], epsilon=1e-10): 170 | return last_free_dist 171 | 172 | last_free_dist = dist 173 | return max_range 174 | 175 | def winding_angle(self, path, point): 176 | wa = 0 177 | for i in range(len(path)-1): 178 | p = np.array([path[i].x, path[i].y]) 179 | pn = np.array([path[i+1].x, path[i+1].y]) 180 | 181 | vp = p - point 182 | vpn = pn - point 183 | 184 | vp_norm = sqrt(vp[0]**2 + vp[1]**2) 185 | vpn_norm = sqrt(vpn[0]**2 + vpn[1]**2) 186 | 187 | assert (vp_norm > 0) 188 | assert (vpn_norm > 0) 189 | 190 | z = np.cross(vp, vpn)/(vp_norm * vpn_norm) 191 | z = min(max(z, -1.0), 1.0) 192 | wa += asin(z) 193 | 194 | return wa 195 | 196 | def homology_vector(self, path): 197 | L = len(self.obstacles) 198 | h = np.zeros((L, 1) ) 199 | for i in range(L): 200 | h[i, 0] = self.winding_angle(path, self.obstacles[i].representative_point) 201 | 202 | return h.reshape((L,)) 203 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/geometry_utils.py: -------------------------------------------------------------------------------- 1 | import scipy 2 | from scipy import stats 3 | import numpy as np 4 | from math import sqrt 5 | from itertools import product 6 | 7 | def closest_point_on_segment(p, a, b): 8 | lsqr = (a[0]-b[0])**2 + (a[1]-b[1])**2 9 | pa = p-a 10 | ba = b-a 11 | 12 | if (lsqr < 1e-15): 13 | return a 14 | 15 | t_opt = pa.dot(ba) / lsqr 16 | t_opt = max(min(t_opt, 1), 0) 17 | cp = a + t_opt * (b-a) 18 | return cp 19 | 20 | def point_to_segment_distance(p, a, b): 21 | cp = closest_point_on_segment(p, a, b) 22 | return np.linalg.norm(cp - p) 23 | 24 | def point_to_rectangle_distance(p, ca, wa, ha): 25 | 26 | if (abs(p-ca) < np.array([wa/2.0, ha/2.0])).all(): 27 | return 0. 28 | 29 | a1 = ca + np.array([wa/2.0, ha/2.0]) 30 | a2 = ca + np.array([wa/2.0, -ha/2.0]) 31 | a3 = ca + np.array([-wa/2.0, -ha/2.0]) 32 | a4 = ca + np.array([-wa/2.0, ha/2.0]) 33 | 34 | d1 = point_to_segment_distance(p, a1, a2) 35 | d2 = point_to_segment_distance(p, a2, a3) 36 | d3 = point_to_segment_distance(p, a3, a4) 37 | d4 = point_to_segment_distance(p, a4, a1) 38 | return min([d1, d2, d3, d4]) 39 | 40 | def point_is_on_left(a, b, c): 41 | """Returns true iff c is on the left of the infinite line ab""" 42 | return (a[0] - c[0]) * (b[1] - c[1]) > (a[1] - c[1]) * (b[0] - c[0]) 43 | 44 | def segments_intersect(a, b, c, d): 45 | """Returns true iff the line segments ab and cd intersect""" 46 | epsilon = 1e-10 47 | 48 | if point_to_segment_distance(a, c, d) < epsilon or point_to_segment_distance(b, c, d) < epsilon: 49 | return True 50 | 51 | if point_to_segment_distance(c, a, b) < epsilon or point_to_segment_distance(d, a, b) < epsilon: 52 | return True 53 | 54 | return point_is_on_left(a,b,c) != point_is_on_left(a,b,d) and point_is_on_left(c,d,a) != point_is_on_left(c,d,b) 55 | 56 | def rectangle_edges(z1,z2,z3,z4): 57 | yield (z1, z2) 58 | yield (z2, z3) 59 | yield (z3, z4) 60 | yield (z4, z1) 61 | 62 | def rectangle_vertices(c,w,h): 63 | yield c + np.array([w/2.0, h/2.0]) 64 | yield c + np.array([w/2.0, -h/2.0]) 65 | yield c + np.array([-w/2.0, -h/2.0]) 66 | yield c + np.array([-w/2.0, h/2.0]) 67 | 68 | 69 | def rectangles_intersect(ca,wa,ha, q1,q2,q3,q4): 70 | a1 = ca + np.array([wa/2.0, ha/2.0]) 71 | a2 = ca + np.array([wa/2.0, -ha/2.0]) 72 | a3 = ca + np.array([-wa/2.0, -ha/2.0]) 73 | a4 = ca + np.array([-wa/2.0, ha/2.0]) 74 | 75 | cq = (q1 + q2 + q3 + q4)/4.0 76 | cq_is_in_rectangle = abs(cq[0] - ca[0]) < wa/2.0 and abs(cq[1] - ca[1]) < ha/2.0 77 | 78 | segment_intersections = [segments_intersect(e1[0], e1[1], e2[0], e2[1]) for e1, e2 in product(rectangle_edges(a1,a2,a3,a4), rectangle_edges(q1,q2,q3,q4))] 79 | return any(segment_intersections) or cq_is_in_rectangle 80 | 81 | 82 | def rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb): 83 | a1 = ca + np.array([wa/2.0, ha/2.0]) 84 | a2 = ca + np.array([wa/2.0, -ha/2.0]) 85 | a3 = ca + np.array([-wa/2.0, -ha/2.0]) 86 | a4 = ca + np.array([-wa/2.0, ha/2.0]) 87 | 88 | b1 = cb + np.array([wb/2.0, hb/2.0]) 89 | b2 = cb + np.array([wb/2.0, -hb/2.0]) 90 | b3 = cb + np.array([-wb/2.0, -hb/2.0]) 91 | b4 = cb + np.array([-wb/2.0, hb/2.0]) 92 | 93 | for e1, e2 in product(rectangle_edges(a1,a2,a3,a4), rectangle_edges(b1,b2,b3,b4)): 94 | if segments_intersect(e1[0], e1[1], e2[0], e2[1]): 95 | return 0.0 96 | 97 | da1 = point_to_rectangle_distance(a1, cb, wb, hb) 98 | da2 = point_to_rectangle_distance(a2, cb, wb, hb) 99 | da3 = point_to_rectangle_distance(a3, cb, wb, hb) 100 | da4 = point_to_rectangle_distance(a4, cb, wb, hb) 101 | 102 | db1 = point_to_rectangle_distance(b1, ca, wa, ha) 103 | db2 = point_to_rectangle_distance(b2, ca, wa, ha) 104 | db3 = point_to_rectangle_distance(b3, ca, wa, ha) 105 | db4 = point_to_rectangle_distance(b4, ca, wa, ha) 106 | return min([da1, da2, da3, da4, db1, db2, db3, db4]) 107 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/gym_rendering.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2D rendering framework 3 | """ 4 | from __future__ import division 5 | import os 6 | import six 7 | import sys 8 | 9 | if "Apple" in sys.version: 10 | if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ: 11 | os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib' 12 | # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite 13 | 14 | from gym.utils import reraise 15 | from gym import error 16 | 17 | try: 18 | import pyglet 19 | except ImportError as e: 20 | reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.") 21 | 22 | try: 23 | from pyglet import gl 24 | except ImportError as e: 25 | reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '") 26 | 27 | import math 28 | import numpy as np 29 | 30 | RAD2DEG = 57.29577951308232 31 | 32 | def get_display(spec): 33 | """Convert a display specification (such as :0) into an actual Display 34 | object. 35 | 36 | Pyglet only supports multiple Displays on Linux. 37 | """ 38 | if spec is None: 39 | return None 40 | elif isinstance(spec, six.string_types): 41 | return pyglet.canvas.Display(spec) 42 | else: 43 | raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec)) 44 | 45 | class Viewer(object): 46 | def __init__(self, width, height, display=None): 47 | display = get_display(display) 48 | 49 | self.width = width 50 | self.height = height 51 | self.window = pyglet.window.Window(width=width, height=height, display=display) 52 | self.window.on_close = self.window_closed_by_user 53 | self.geoms = [] 54 | self.onetime_geoms = [] 55 | self.transform = Transform() 56 | 57 | gl.glEnable(gl.GL_BLEND) 58 | gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA) 59 | 60 | def close(self): 61 | self.window.close() 62 | 63 | def window_closed_by_user(self): 64 | self.close() 65 | 66 | def set_bounds(self, left, right, bottom, top): 67 | assert right > left and top > bottom 68 | scalex = self.width/(right-left) 69 | scaley = self.height/(top-bottom) 70 | self.transform = Transform( 71 | translation=(-left*scalex, -bottom*scaley), 72 | scale=(scalex, scaley)) 73 | 74 | def add_geom(self, geom): 75 | self.geoms.append(geom) 76 | 77 | def add_onetime(self, geom): 78 | self.onetime_geoms.append(geom) 79 | 80 | def render(self, return_rgb_array=False): 81 | gl.glClearColor(1,1,1,1) 82 | self.window.clear() 83 | self.window.switch_to() 84 | self.window.dispatch_events() 85 | self.transform.enable() 86 | for geom in self.geoms: 87 | geom.render() 88 | for geom in self.onetime_geoms: 89 | geom.render() 90 | self.transform.disable() 91 | arr = None 92 | if return_rgb_array: 93 | buffer = pyglet.image.get_buffer_manager().get_color_buffer() 94 | image_data = buffer.get_image_data() 95 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') 96 | # In https://github.com/openai/gym-http-api/issues/2, we 97 | # discovered that someone using Xmonad on Arch was having 98 | # a window of size 598 x 398, though a 600 x 400 window 99 | # was requested. (Guess Xmonad was preserving a pixel for 100 | # the boundary.) So we use the buffer height/width rather 101 | # than the requested one. 102 | arr = arr.reshape(buffer.height, buffer.width, 4) 103 | arr = arr[::-1,:,0:3] 104 | self.window.flip() 105 | self.onetime_geoms = [] 106 | return arr 107 | 108 | # Convenience 109 | def draw_circle(self, radius=10, res=30, filled=True, **attrs): 110 | geom = make_circle(radius=radius, res=res, filled=filled) 111 | _add_attrs(geom, attrs) 112 | self.add_onetime(geom) 113 | return geom 114 | 115 | def draw_polygon(self, v, filled=True, **attrs): 116 | geom = make_polygon(v=v, filled=filled) 117 | _add_attrs(geom, attrs) 118 | self.add_onetime(geom) 119 | return geom 120 | 121 | def draw_polyline(self, v, **attrs): 122 | geom = make_polyline(v=v) 123 | _add_attrs(geom, attrs) 124 | self.add_onetime(geom) 125 | return geom 126 | 127 | def draw_line(self, start, end, **attrs): 128 | geom = Line(start, end) 129 | _add_attrs(geom, attrs) 130 | self.add_onetime(geom) 131 | return geom 132 | 133 | def get_array(self): 134 | self.window.flip() 135 | image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() 136 | self.window.flip() 137 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') 138 | arr = arr.reshape(self.height, self.width, 4) 139 | return arr[::-1,:,0:3] 140 | 141 | def _add_attrs(geom, attrs): 142 | if "color" in attrs: 143 | geom.set_color(*attrs["color"]) 144 | if "linewidth" in attrs: 145 | geom.set_linewidth(attrs["linewidth"]) 146 | 147 | class Geom(object): 148 | def __init__(self): 149 | self._color=Color((0, 0, 0, 1.0)) 150 | self.attrs = [self._color] 151 | def render(self): 152 | for attr in reversed(self.attrs): 153 | attr.enable() 154 | self.render1() 155 | for attr in self.attrs: 156 | attr.disable() 157 | def render1(self): 158 | raise NotImplementedError 159 | def add_attr(self, attr): 160 | self.attrs.append(attr) 161 | def set_color(self, r, g, b): 162 | self._color.vec4 = (r, g, b, 1) 163 | 164 | class Attr(object): 165 | def enable(self): 166 | raise NotImplementedError 167 | def disable(self): 168 | pass 169 | 170 | class Transform(Attr): 171 | def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)): 172 | self.set_translation(*translation) 173 | self.set_rotation(rotation) 174 | self.set_scale(*scale) 175 | def enable(self): 176 | gl.glPushMatrix() 177 | gl.glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint 178 | gl.glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0) 179 | gl.glScalef(self.scale[0], self.scale[1], 1) 180 | def disable(self): 181 | gl.glPopMatrix() 182 | def set_translation(self, newx, newy): 183 | self.translation = (float(newx), float(newy)) 184 | def set_rotation(self, new): 185 | self.rotation = float(new) 186 | def set_scale(self, newx, newy): 187 | self.scale = (float(newx), float(newy)) 188 | 189 | class Color(Attr): 190 | def __init__(self, vec4): 191 | self.vec4 = vec4 192 | def enable(self): 193 | gl.glColor4f(*self.vec4) 194 | 195 | class LineStyle(Attr): 196 | def __init__(self, style): 197 | self.style = style 198 | def enable(self): 199 | gl.glEnable(gl.GL_LINE_STIPPLE) 200 | gl.glLineStipple(1, self.style) 201 | def disable(self): 202 | gl.glDisable(gl.GL_LINE_STIPPLE) 203 | 204 | class LineWidth(Attr): 205 | def __init__(self, stroke): 206 | self.stroke = stroke 207 | def enable(self): 208 | gl.glLineWidth(self.stroke) 209 | 210 | class Point(Geom): 211 | def __init__(self): 212 | Geom.__init__(self) 213 | def render1(self): 214 | gl.glBegin(gl.GL_POINTS) # draw point 215 | gl.glVertex3f(0.0, 0.0, 0.0) 216 | gl.glEnd() 217 | 218 | class FilledPolygon(Geom): 219 | def __init__(self, v): 220 | Geom.__init__(self) 221 | self.v = v 222 | def render1(self): 223 | if len(self.v) == 4 : gl.glBegin(gl.GL_QUADS) 224 | elif len(self.v) > 4 : gl.glBegin(gl.GL_POLYGON) 225 | else: gl.glBegin(gl.GL_TRIANGLES) 226 | for p in self.v: 227 | gl.glVertex3f(p[0], p[1],0) # draw each vertex 228 | gl.glEnd() 229 | 230 | def make_circle(radius=10, res=30, filled=True): 231 | points = [] 232 | for i in range(res): 233 | ang = 2*math.pi*i / res 234 | points.append((math.cos(ang)*radius, math.sin(ang)*radius)) 235 | if filled: 236 | return FilledPolygon(points) 237 | else: 238 | return PolyLine(points, True) 239 | 240 | def make_polygon(v, filled=True): 241 | if filled: return FilledPolygon(v) 242 | else: return PolyLine(v, True) 243 | 244 | def make_polyline(v): 245 | return PolyLine(v, False) 246 | 247 | def make_capsule(length, width): 248 | l, r, t, b = 0, length, width/2, -width/2 249 | box = make_polygon([(l,b), (l,t), (r,t), (r,b)]) 250 | circ0 = make_circle(width/2) 251 | circ1 = make_circle(width/2) 252 | circ1.add_attr(Transform(translation=(length, 0))) 253 | geom = Compound([box, circ0, circ1]) 254 | return geom 255 | 256 | class Compound(Geom): 257 | def __init__(self, gs): 258 | Geom.__init__(self) 259 | self.gs = gs 260 | for g in self.gs: 261 | g.attrs = [a for a in g.attrs if not isinstance(a, Color)] 262 | def render1(self): 263 | for g in self.gs: 264 | g.render() 265 | 266 | class PolyLine(Geom): 267 | def __init__(self, v, close): 268 | Geom.__init__(self) 269 | self.v = v 270 | self.close = close 271 | self.linewidth = LineWidth(1) 272 | self.add_attr(self.linewidth) 273 | def render1(self): 274 | gl.glBegin(gl.GL_LINE_LOOP if self.close else gl.GL_LINE_STRIP) 275 | for p in self.v: 276 | gl.glVertex3f(p[0], p[1],0) # draw each vertex 277 | gl.glEnd() 278 | def set_linewidth(self, x): 279 | self.linewidth.stroke = x 280 | 281 | class Line(Geom): 282 | def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)): 283 | Geom.__init__(self) 284 | self.start = start 285 | self.end = end 286 | self.linewidth = LineWidth(1) 287 | self.add_attr(self.linewidth) 288 | 289 | def render1(self): 290 | gl.glBegin(gl.GL_LINES) 291 | gl.glVertex2f(*self.start) 292 | gl.glVertex2f(*self.end) 293 | gl.glEnd() 294 | 295 | class Image(Geom): 296 | def __init__(self, fname, width, height): 297 | Geom.__init__(self) 298 | self.width = width 299 | self.height = height 300 | img = pyglet.image.load(fname) 301 | self.img = img 302 | self.flip = False 303 | def render1(self): 304 | self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height) 305 | 306 | # ================================================================ 307 | 308 | class SimpleImageViewer(object): 309 | def __init__(self, display=None): 310 | self.window = None 311 | self.isopen = False 312 | self.display = display 313 | def imshow(self, arr): 314 | if self.window is None: 315 | height, width, channels = arr.shape 316 | self.window = pyglet.window.Window(width=width, height=height, display=self.display) 317 | self.width = width 318 | self.height = height 319 | self.isopen = True 320 | assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape" 321 | image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3) 322 | self.window.clear() 323 | self.window.switch_to() 324 | self.window.dispatch_events() 325 | image.blit(0,0) 326 | self.window.flip() 327 | def close(self): 328 | if self.isopen: 329 | self.window.close() 330 | self.isopen = False 331 | def __del__(self): 332 | self.close() 333 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/image_based_navigation.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import error, spaces, utils 3 | from gym.utils import seeding 4 | from gym.spaces import Box, Tuple 5 | 6 | from math import pi, cos, sin 7 | import numpy as np 8 | import logging 9 | from skimage.draw import circle 10 | from skimage.transform import resize 11 | from .range_based_navigation import StateBasedMDPNavigation2DEnv 12 | 13 | class ImageBasedNavigation2DEnv(StateBasedMDPNavigation2DEnv): 14 | logger = logging.getLogger(__name__) 15 | metadata = {'render.modes': ['human']} 16 | 17 | def __init__(self, *args, **kwargs): 18 | StateBasedMDPNavigation2DEnv.__init__(self, *args, **kwargs) 19 | self.obs_img_shape = (160, 120, 3) 20 | 21 | self.observation_space = Box(0., 255., (self.obs_img_shape[1], self.obs_img_shape[0], 3)) 22 | 23 | def set_circular_observation(self, img, col_center, row_center, radius, color=(0,0,0)): 24 | rr,cc = circle(row_center, col_center, radius) 25 | # make sure within bounds of img 26 | rr[rr<0] = 0 27 | rr[rr>img.shape[0]-1] = img.shape[0]-1 28 | cc[cc<0] = 0 29 | cc[cc>img.shape[1]-1] = img.shape[1]-1 30 | img[rr,cc] = color 31 | return img 32 | 33 | def _get_observation(self, state): 34 | image = self.world.image.copy() 35 | 36 | state_col = int(self.state[0]) 37 | state_row = int(self.state[1]) 38 | 39 | dest_col = int(self.destination[0]) 40 | dest_row = int(self.destination[1]) 41 | dest_rad = int(self.destination_tolerance_range) 42 | image = self.set_circular_observation(image, state_col, state_row, 5, (0,0,0)) 43 | image = self.set_circular_observation(image, dest_col, dest_row, dest_rad, (255,0,0)) 44 | image = image[::-1,:,:] 45 | image = resize(image, self.obs_img_shape[0:2][::-1]) 46 | 47 | return image 48 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/range_based_navigation.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import error, spaces, utils 3 | from gym.utils import seeding 4 | from gym.spaces import Box, Tuple 5 | from .env_generator import EnvironmentCollection 6 | 7 | from math import pi, cos, sin 8 | import numpy as np 9 | 10 | #from gym.envs.classic_control.rendering import make_circle, Transform 11 | from gym_extensions.continuous.gym_navigation_2d import gym_rendering 12 | import os 13 | import logging 14 | 15 | class LimitedRangeBasedPOMDPNavigation2DEnv(gym.Env): 16 | logger = logging.getLogger(__name__) 17 | metadata = {'render.modes': ['human']} 18 | 19 | def __init__(self, 20 | worlds_pickle_filename=os.path.join(os.path.dirname(__file__), "assets", "worlds_640x480_v0.pkl"), 21 | world_idx=0, 22 | initial_position = np.array([-20.0, -20.0]), 23 | destination = np.array([520.0, 400.0]), 24 | max_observation_range = 100.0, 25 | destination_tolerance_range=20.0, 26 | add_self_position_to_observation=False, 27 | add_goal_position_to_observation=False): 28 | 29 | worlds = EnvironmentCollection() 30 | worlds.read(worlds_pickle_filename) 31 | 32 | self.world = worlds.map_collection[world_idx] 33 | self.set_destination(destination) 34 | 35 | assert not (self.destination is None) 36 | self.init_position = initial_position 37 | self.state = self.init_position.copy() 38 | 39 | 40 | self.max_observation_range = max_observation_range 41 | self.destination_tolerance_range = destination_tolerance_range 42 | self.viewer = None 43 | self.num_beams = 16 44 | self.max_speed = 5 45 | self.add_self_position_to_observation = add_self_position_to_observation 46 | self.add_goal_position_to_observation = add_goal_position_to_observation 47 | 48 | 49 | low = np.array([0.0, 0.0]) 50 | high = np.array([self.max_speed, 2*pi]) 51 | self.action_space = Box(low, high)#Tuple( (Box(0.0, self.max_speed, (1,)), Box(0.0, 2*pi, (1,))) ) 52 | low = [-1.0] * self.num_beams 53 | high = [self.max_observation_range] * self.num_beams 54 | if add_self_position_to_observation: 55 | low.extend([-10000., -10000.]) # x and y coords 56 | high.extend([10000., 10000.]) 57 | if add_goal_position_to_observation: 58 | low.extend([-10000., -10000.]) # x and y coords 59 | high.extend([10000., 10000.]) 60 | 61 | 62 | self.observation_space = Box(np.array(low), np.array(high)) 63 | self.observation = [] 64 | 65 | def set_destination(self, destination): 66 | self.destination = destination 67 | 68 | def _get_observation(self, state): 69 | delta_angle = 2*pi/self.num_beams 70 | ranges = [self.world.raytrace(self.state, 71 | i * delta_angle, 72 | self.max_observation_range) for i in range(self.num_beams)] 73 | 74 | ranges = np.array(ranges) 75 | if self.add_self_position_to_observation: 76 | ranges = np.concatenate([ranges, self.state]) 77 | if self.add_goal_position_to_observation: 78 | ranges = np.concatenate([ranges, self.destination]) 79 | return ranges 80 | 81 | def _step(self, action): 82 | old_state = self.state.copy() 83 | v = action[0] 84 | theta = action[1] 85 | dx = v*cos(theta) 86 | dy = v*sin(theta) 87 | 88 | self.state += np.array([dx, dy]) 89 | 90 | reward = -1 # minus 1 for every timestep you're not in the goal 91 | done = False 92 | info = {} 93 | 94 | if np.linalg.norm(self.destination - self.state) < self.destination_tolerance_range: 95 | reward = 20 # for reaching the goal 96 | done = True 97 | 98 | if not self.world.point_is_in_free_space(self.state[0], self.state[1], epsilon=0.25): 99 | reward = -5 # for hitting an obstacle 100 | 101 | if not self.world.segment_is_in_free_space(old_state[0], old_state[1], 102 | self.state[0], self.state[1], 103 | epsilon=0.25): 104 | reward = -5 # for hitting an obstacle 105 | 106 | self.observation = self._get_observation(self.state) 107 | return self.observation, reward, done, info 108 | 109 | 110 | def _reset(self): 111 | self.state = self.init_position 112 | return self._get_observation(self.state) 113 | 114 | def _plot_state(self, viewer, state): 115 | polygon = gym_rendering.make_circle(radius=5, res=30, filled=True) 116 | state_tr = gym_rendering.Transform(translation=(state[0], state[1])) 117 | polygon.add_attr(state_tr) 118 | viewer.add_onetime(polygon) 119 | 120 | 121 | def _plot_observation(self, viewer, state, observation): 122 | delta_angle = 2*pi/self.num_beams 123 | for i in range(len(observation)): 124 | r = observation[i] 125 | if r < 0: 126 | r = self.max_observation_range 127 | 128 | theta = i*delta_angle 129 | start = (state[0], state[1]) 130 | end = (state[0] + r*cos(theta), state[1] + r*sin(theta)) 131 | 132 | line = gym_rendering.Line(start=start, end=end) 133 | line.set_color(.5, 0.5, 0.5) 134 | viewer.add_onetime(line) 135 | 136 | def _append_elements_to_viewer(self, viewer, 137 | screen_width, 138 | screen_height, 139 | obstacles, 140 | destination=None, 141 | destination_tolerance_range=None): 142 | 143 | viewer.set_bounds(left=-100, right=screen_width+100, bottom=-100, top=screen_height+100) 144 | 145 | L = len(obstacles) 146 | for i in range(L): 147 | 148 | obs = obstacles[i] 149 | for c,w,h in zip(obs.rectangle_centers, obs.rectangle_widths, obs.rectangle_heights): 150 | l = -w/2.0 151 | r = w/2.0 152 | t = h/2.0 153 | b = -h/2.0 154 | 155 | rectangle = gym_rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)]) 156 | tr = gym_rendering.Transform(translation=(c[0], c[1])) 157 | rectangle.add_attr(tr) 158 | rectangle.set_color(.8,.6,.4) 159 | viewer.add_geom(rectangle) 160 | 161 | 162 | if not (destination is None): 163 | tr = gym_rendering.Transform(translation=(destination[0], destination[1])) 164 | polygon = gym_rendering.make_circle(radius=destination_tolerance_range, res=30, filled=True) 165 | polygon.add_attr(tr) 166 | polygon.set_color(1.0, 0., 0.) 167 | viewer.add_geom(polygon) 168 | 169 | def _render(self, mode='human', close=False): 170 | 171 | if close: 172 | if self.viewer is not None: 173 | self.viewer.close() 174 | self.viewer = None 175 | return 176 | 177 | screen_width = (self.world.x_range[1] - self.world.x_range[0]) 178 | screen_height = (self.world.y_range[1] - self.world.y_range[0]) 179 | 180 | if self.viewer is None: 181 | self.viewer = gym_rendering.Viewer(screen_width, screen_height) 182 | self._append_elements_to_viewer(self.viewer, 183 | screen_width, 184 | screen_height, 185 | obstacles=self.world.obstacles, 186 | destination=self.destination, 187 | destination_tolerance_range=self.destination_tolerance_range) 188 | 189 | self._plot_state(self.viewer, self.state) 190 | self._plot_observation(self.viewer, self.state, self.observation) 191 | 192 | return self.viewer.render(return_rgb_array = mode=='rgb_array') 193 | 194 | class StateBasedMDPNavigation2DEnv(LimitedRangeBasedPOMDPNavigation2DEnv): 195 | logger = logging.getLogger(__name__) 196 | def __init__(self, *args, **kwargs): 197 | LimitedRangeBasedPOMDPNavigation2DEnv.__init__(self, *args, **kwargs) 198 | low = [-float('inf'), -float('inf'), 0.0, 0.0] 199 | high = [float('inf'), float('inf'), float('inf'), 2*pi] 200 | 201 | if self.add_goal_position_to_observation: 202 | low.extend([-10000., -10000.]) # x and y coords 203 | high.extend([10000., 10000.]) 204 | 205 | self.observation_space = Box(np.array(low), np.array(high)) 206 | 207 | def _plot_observation(self, viewer, state, observation): 208 | pass 209 | 210 | def _get_observation(self, state): 211 | # return state 212 | dist_to_closest_obstacle, absolute_angle_to_closest_obstacle = self.world.range_and_bearing_to_closest_obstacle(state[0], state[1]) 213 | obs = np.array([state[0], state[1], dist_to_closest_obstacle, absolute_angle_to_closest_obstacle]) 214 | if self.add_goal_position_to_observation: 215 | obs = np.concatenate([obs, self.destination]) 216 | return obs 217 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/rrt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | import time 4 | import pickle 5 | import numpy as np 6 | import random 7 | 8 | from itertools import product 9 | from math import cos, sin, pi, sqrt 10 | import copy 11 | 12 | class State(object): 13 | def __init__(self, x, y, parent): 14 | self.x = x 15 | self.y = y 16 | self.parent = parent 17 | self.children = [] 18 | 19 | def __hash__(self): 20 | return hash((self.x, self.y)) 21 | 22 | def euclidean_distance(self, state): 23 | assert (state) 24 | return sqrt((state.x - self.x)**2 + (state.y - self.y)**2) 25 | 26 | 27 | class RRT(object): 28 | def __init__(self, world): 29 | self.world = world 30 | 31 | def state_is_free(self, state): 32 | return self.world.point_is_in_free_space(state.x, state.y, epsilon=0.2) 33 | 34 | def sample_state(self): 35 | x = random.uniform(self.world.x_range[0], self.world.x_range[1]) 36 | y = random.uniform(self.world.y_range[0], self.world.y_range[1]) 37 | return State(x, y, None) 38 | 39 | 40 | def _follow_parent_pointers(self, state): 41 | """ 42 | Returns the path [start_state, ..., destination_state] by following the 43 | parent pointers. 44 | """ 45 | 46 | curr_ptr = state 47 | path = [state] 48 | 49 | while curr_ptr is not None: 50 | path.append(curr_ptr) 51 | curr_ptr = curr_ptr.parent 52 | 53 | # return a reverse copy of the path (so that first state is starting state) 54 | return path[::-1] 55 | 56 | 57 | def find_closest_state(self, tree_nodes, state): 58 | min_dist = float("Inf") 59 | closest_state = None 60 | for node in tree_nodes: 61 | dist = node.euclidean_distance(state) 62 | if dist < min_dist: 63 | closest_state = node 64 | min_dist = dist 65 | 66 | return closest_state 67 | 68 | def steer_towards(self, s_nearest, s_rand, max_radius): 69 | """ 70 | Returns a new state s_new whose coordinates x and y 71 | are decided as follows: 72 | 73 | If s_rand is within a circle of max_radius from s_nearest 74 | then s_new.x = s_rand.x and s_new.y = s_rand.y 75 | 76 | Otherwise, s_rand is farther than max_radius from s_nearest. 77 | In this case we place s_new on the line from s_nearest to 78 | s_rand, at a distance of max_radius away from s_nearest. 79 | 80 | """ 81 | 82 | vx = float(s_rand.x - s_nearest.x) 83 | vy = float(s_rand.y - s_nearest.y) 84 | v = sqrt(vx**2 + vy**2) 85 | 86 | x = 0 87 | y = 0 88 | 89 | if v < max_radius: 90 | x = s_rand.x 91 | y = s_rand.y 92 | elif v > 1e-10: 93 | x = s_nearest.x + vx/v * max_radius 94 | y = s_nearest.y + vy/v * max_radius 95 | 96 | s_new = State(x, y, s_nearest) 97 | return s_new 98 | 99 | 100 | def segment_is_obstacle_free(self, s_from, s_to): 101 | """ 102 | Returns true iff the line path from s_from to s_to 103 | is free 104 | """ 105 | assert (self.state_is_free(s_from)) 106 | 107 | if not (self.state_is_free(s_to)): 108 | return False 109 | 110 | return self.world.segment_is_in_free_space(s_from.x, s_from.y, s_to.x, s_to.y) 111 | 112 | 113 | def plan(self, start_state, dest_state, max_num_steps, max_steering_radius, dest_reached_radius): 114 | """ 115 | Returns a path as a sequence of states [start_state, ..., dest_state] 116 | if dest_state is reachable from start_state. Otherwise returns [start_state]. 117 | Assume both source and destination are in free space. 118 | """ 119 | assert (self.state_is_free(start_state)) 120 | assert (self.state_is_free(dest_state)) 121 | 122 | # The set containing the nodes of the tree 123 | tree_nodes = set() 124 | tree_nodes.add(start_state) 125 | 126 | # image to be used to display the tree 127 | img = np.copy(self.world) 128 | 129 | plan = [start_state] 130 | 131 | for step in xrange(max_num_steps): 132 | 133 | s_rand = self.sample_state() 134 | s_nearest = self.find_closest_state(tree_nodes, s_rand) 135 | s_new = self.steer_towards(s_nearest, s_rand, max_steering_radius) 136 | 137 | if self.segment_is_obstacle_free(s_nearest, s_new): 138 | tree_nodes.add(s_new) 139 | s_nearest.children.append(s_new) 140 | 141 | # If we approach the destination within a few pixels 142 | # we're done. Return the path. 143 | if s_new.euclidean_distance(dest_state) < dest_reached_radius: 144 | dest_state.parent = s_new 145 | plan = self._follow_parent_pointers(dest_state) 146 | return plan, tree_nodes 147 | 148 | return [start_state], tree_nodes 149 | 150 | def smooth(self, path, alpha=1.5, rate=0.01, max_iterations=400, min_safe_distance=1.5): 151 | path = copy.deepcopy(path) 152 | old_path = np.array([ [state.x, state.y] for state in path]) 153 | new_path = old_path.copy() 154 | it = 0 155 | 156 | while True: 157 | 158 | gradE = (new_path - old_path) 159 | for i in xrange(1, len(path)-1): 160 | gradE[i, :] -= alpha*(new_path[i+1, :] + new_path[i-1, :] - 2*new_path[i, :]) 161 | 162 | gradE[0, :] = 0 163 | gradE[-1, :] = 0 164 | 165 | proposed_new_path = new_path - rate*gradE 166 | 167 | safe_idx = [self.world.segment_is_in_free_space(proposed_new_path[i-1][0], 168 | proposed_new_path[i-1][1], 169 | proposed_new_path[i][0], 170 | proposed_new_path[i][1]) and \ 171 | 172 | self.world.segment_is_in_free_space(proposed_new_path[i][0], 173 | proposed_new_path[i][1], 174 | proposed_new_path[i+1][0], 175 | proposed_new_path[i+1][1]) \ 176 | 177 | for i in xrange(1, proposed_new_path.shape[0]-1)] 178 | 179 | 180 | improving_idx = [self.world.segment_distance_from_obstacles(proposed_new_path[i][0], 181 | proposed_new_path[i][1], 182 | proposed_new_path[i+1][0], 183 | proposed_new_path[i+1][1]) > 184 | 185 | self.world.segment_distance_from_obstacles(old_path[i][0], 186 | old_path[i][1], 187 | old_path[i+1][0], 188 | old_path[i+1][1]) 189 | 190 | for i in xrange(1, proposed_new_path.shape[0]-1)] 191 | 192 | safe_idx = np.array([False] + safe_idx + [False]) 193 | improving_idx = np.array([False] + improving_idx + [False]) 194 | safe_idx = safe_idx + improving_idx 195 | 196 | if not (any(safe_idx) and it < max_iterations): 197 | break 198 | 199 | new_path[safe_idx, :] = proposed_new_path[safe_idx, :] 200 | it += 1 201 | 202 | for i in xrange(1, len(path)-1): 203 | path[i].x = new_path[i, 0] 204 | path[i].y = new_path[i, 1] 205 | 206 | return path 207 | -------------------------------------------------------------------------------- /gym_extensions/continuous/gym_navigation_2d/test_env_generator.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from env_generator import EnvironmentCollection, EnvironmentGenerator 4 | from env_utils import Environment 5 | from geometry_utils import * 6 | 7 | from math import sqrt 8 | import os 9 | 10 | class TestDistanceMethods(unittest.TestCase): 11 | 12 | def test_point_to_segment(self): 13 | p = np.array([0,0]) 14 | a = np.array([1,1]) 15 | b = np.array([2,2]) 16 | self.assertEqual(point_to_segment_distance(p,a,b), sqrt(2)) 17 | 18 | 19 | def test_point_to_segment2(self): 20 | p = np.array([3,3]) 21 | a = np.array([1,1]) 22 | b = np.array([2,2]) 23 | self.assertEqual(point_to_segment_distance(p,a,b), sqrt(2)) 24 | 25 | 26 | def test_point_to_segment3(self): 27 | p = np.array([1.5,1.5]) 28 | a = np.array([1,1]) 29 | b = np.array([2,2]) 30 | self.assertEqual(point_to_segment_distance(p,a,b), 0.) 31 | 32 | 33 | def test_point_to_segment4(self): 34 | p = np.array([1,0]) 35 | a = np.array([1,1]) 36 | b = np.array([2,2]) 37 | self.assertEqual(point_to_segment_distance(p,a,b), 1) 38 | 39 | 40 | def test_point_to_rectangle_distance(self): 41 | p = np.array([0, 0]) 42 | c = np.array([1, 1]) 43 | w = 1.0 44 | h = 1.0 45 | self.assertEqual(point_to_rectangle_distance(p, c, w, h), sqrt(2*(0.5)**2)) 46 | 47 | 48 | def test_point_to_rectangle_distance2(self): 49 | p = np.array([1, 1]) 50 | c = np.array([1, 1]) 51 | w = 1.0 52 | h = 1.0 53 | self.assertEqual(point_to_rectangle_distance(p, c, w, h), 0) 54 | 55 | def test_point_to_rectangle_distance3(self): 56 | p = np.array([0.6, 0.6]) 57 | c = np.array([1, 1]) 58 | w = 1.0 59 | h = 1.0 60 | self.assertEqual(point_to_rectangle_distance(p, c, w, h), 0) 61 | 62 | p = np.array([1.4, 1.4]) 63 | c = np.array([1, 1]) 64 | w = 1.0 65 | h = 1.0 66 | self.assertEqual(point_to_rectangle_distance(p, c, w, h), 0) 67 | 68 | 69 | def test_rectangle_to_rectangle_distance(self): 70 | ca = np.array([1, 1]) 71 | wa = 1.0 72 | ha = 1.0 73 | 74 | cb = np.array([0, 0]) 75 | wb = 1.0 76 | hb = 1.0 77 | 78 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0) 79 | 80 | 81 | def test_rectangle_to_rectangle_distance2(self): 82 | ca = np.array([0, 2]) 83 | wa = 2.0 84 | ha = 2.0 85 | 86 | cb = np.array([0, 0]) 87 | wb = 1.0 88 | hb = 1.0 89 | 90 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0.5) 91 | 92 | 93 | def test_rectangle_to_rectangle_distance3(self): 94 | ca = np.array([0, 2]) 95 | wa = 2.0 96 | ha = 2.0 97 | 98 | cb = np.array([0, 0]) 99 | wb = 1.0 100 | hb = 1.0 101 | 102 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0.5) 103 | 104 | 105 | def test_rectangle_to_rectangle_distance4(self): 106 | ca = np.array([0, 0]) 107 | wa = 2.0 108 | ha = 2.0 109 | 110 | cb = np.array([0, 0]) 111 | wb = 1.0 112 | hb = 1.0 113 | 114 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0.) 115 | 116 | 117 | def test_rectangle_to_rectangle_distance5(self): 118 | ca = np.array([-1, 0]) 119 | wa = 1.0 120 | ha = 1.0 121 | 122 | cb = np.array([0, 0]) 123 | wb = 1.0 124 | hb = 1.0 125 | 126 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0.) 127 | 128 | 129 | def test_rectangle_to_rectangle_distance6(self): 130 | ca = np.array([-2, 0]) 131 | wa = 1.0 132 | ha = 1.0 133 | 134 | cb = np.array([0, 0]) 135 | wb = 1.0 136 | hb = 1.0 137 | 138 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 1.) 139 | 140 | 141 | def test_rectangle_to_rectangle_distance7(self): 142 | ca = np.array([-2, -2]) 143 | wa = 1.0 144 | ha = 1.0 145 | 146 | cb = np.array([0, 0]) 147 | wb = 1.0 148 | hb = 1.0 149 | 150 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), sqrt(8) - 2*sqrt(2*0.5**2)) 151 | 152 | def test_rectangle_to_rectangle_distance8(self): 153 | ca = np.array([45.41472479, 42.0288299]) 154 | wa = 1.63125936059 155 | ha = 6.08773739678 156 | 157 | cb = np.array([45.70077727, 41.39116316]) 158 | wb = 6.98818845647 159 | hb = 3.83908874358 160 | 161 | self.assertEqual(rectangle_to_rectangle_distance(ca, cb, wa, wb, ha, hb), 0) 162 | 163 | 164 | def test_rectangle_to_object_distance(self): 165 | ca = np.array([-2, -2]) 166 | wa = 1.0 167 | ha = 1.0 168 | 169 | obs = Obstacle(ca,wa,ha) 170 | 171 | cb = np.array([0, 0]) 172 | wb = 1.0 173 | hb = 1.0 174 | 175 | self.assertEqual(obs.distance_to_rectangle(cb, wb, hb), sqrt(8) - 2*sqrt(2*0.5**2)) 176 | 177 | 178 | def test_rectangle_to_object_distance2(self): 179 | ca = np.array([-2, -2]) 180 | wa = 1.0 181 | ha = 1.0 182 | obs = Obstacle(ca,wa,ha) 183 | 184 | obs.append(np.array([2,2]), 1, 1) 185 | 186 | cb = np.array([0, 0]) 187 | wb = 1.0 188 | hb = 1.0 189 | 190 | self.assertEqual(obs.distance_to_rectangle(cb, wb, hb), sqrt(8) - 2*sqrt(2*0.5**2)) 191 | 192 | 193 | def test_rectangle_to_object_distance3(self): 194 | ca = np.array([-2, -2]) 195 | wa = 1.0 196 | ha = 1.0 197 | obs = Obstacle(ca,wa,ha) 198 | 199 | obs.append(np.array([2,2]), 1, 1) 200 | obs.append(np.array([1,1]), 1, 1) 201 | 202 | cb = np.array([0, 0]) 203 | wb = 1.0 204 | hb = 1.0 205 | 206 | self.assertEqual(obs.distance_to_rectangle(cb, wb, hb), 0) 207 | 208 | 209 | def test_object_to_object_distance(self): 210 | ca = np.array([-2, -2]) 211 | wa = 1.0 212 | ha = 1.0 213 | obs = Obstacle(ca,wa,ha) 214 | obs.append(np.array([2,2]), 1, 1) 215 | obs.append(np.array([1,1]), 1, 1) 216 | 217 | cb = np.array([0, 0]) 218 | wb = 1.0 219 | hb = 1.0 220 | obs2 = Obstacle(cb,wb,hb) 221 | 222 | self.assertEqual(obs.distance_to_obstacle(obs2), 0) 223 | 224 | 225 | def test_object_to_object_distance2(self): 226 | ca = np.array([-2, -2]) 227 | wa = 1.0 228 | ha = 1.0 229 | obs = Obstacle(ca,wa,ha) 230 | obs.append(np.array([2,2]), 1, 1) 231 | obs.append(np.array([1,1]), 1, 1) 232 | 233 | cb = np.array([4, 2]) 234 | wb = 1.0 235 | hb = 1.0 236 | obs2 = Obstacle(cb,wb,hb) 237 | obs2.append(np.array([1,1]), 1, 1) 238 | 239 | self.assertEqual(obs.distance_to_obstacle(obs2), 0) 240 | 241 | 242 | def test_object_to_object_distance3(self): 243 | ca = np.array([-2, -2]) 244 | wa = 1.0 245 | ha = 1.0 246 | obs = Obstacle(ca,wa,ha) 247 | obs.append(np.array([2,2]), 1, 1) 248 | obs.append(np.array([1,1]), 1, 1) 249 | 250 | cb = np.array([4, 2]) 251 | wb = 1.0 252 | hb = 1.0 253 | obs2 = Obstacle(cb,wb,hb) 254 | obs2.append(np.array([10,10]), 1, 1) 255 | obs2.append(np.array([0,2.1]), 1, 1) 256 | 257 | self.assertAlmostEqual(obs.distance_to_obstacle(obs2), 0.1) 258 | 259 | 260 | def test_merge_rectangles(self): 261 | centers = np.array([[-2,-2], [2,2], [0,0], [10,10], [11,11]]) 262 | widths = np.array([2,2,2,1,1]).reshape((len(centers),1)) 263 | heights = np.array([2,2,2,1,1]).reshape((len(centers),1)) 264 | 265 | 266 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5]) 267 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 268 | 269 | self.assertEqual(len(obstacles), 2) 270 | 271 | 272 | def test_merge_rectangles2(self): 273 | centers = np.array([[-2,-2], [2,2], [0,0], [11,11], [10,10]]) 274 | widths = np.array([2,2,2,1,1]).reshape((len(centers),1)) 275 | heights = np.array([2,2,2,1,1]).reshape((len(centers),1)) 276 | 277 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5]) 278 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 279 | 280 | self.assertEqual(len(obstacles), 2) 281 | 282 | 283 | def test_merge_rectangles3(self): 284 | centers = np.array([[-2,-2], [2,2], [0,0], [10,10], [12,12], [11,11]]) 285 | widths = np.array([2,2,2,1,1,1]).reshape((len(centers),1)) 286 | heights = np.array([2,2,2,1,1,1]).reshape((len(centers),1)) 287 | 288 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5]) 289 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 290 | 291 | self.assertEqual(len(obstacles), 2) 292 | 293 | def test_merge_rectangles4(self): 294 | centers = np.array([[-2,-2], [2,2], [0,0], [10,10], [13,13], [11,11], [12,12]]) 295 | widths = np.array([2,2,2,1,1,1,1]).reshape((len(centers),1)) 296 | heights = np.array([2,2,2,1,1,1,1]).reshape((len(centers),1)) 297 | 298 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5]) 299 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 300 | 301 | self.assertEqual(len(obstacles), 2) 302 | 303 | 304 | def test_merge_rectangles5(self): 305 | centers = np.array([[-2,-2], [2,2], [0,0], [10,10], [13,13], [12,12]]) 306 | widths = np.array([2,2,2,1,1,1]).reshape((len(centers),1)) 307 | heights = np.array([2,2,2,1,1,1]).reshape((len(centers),1)) 308 | 309 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5]) 310 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 311 | 312 | self.assertEqual(len(obstacles), 3) 313 | 314 | 315 | def test_point_in_free_space(self): 316 | centers = np.array([[0,0]]) 317 | widths = np.array([10]).reshape((len(centers),1)) 318 | heights = np.array([10]).reshape((len(centers),1)) 319 | 320 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5]) 321 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 322 | 323 | env = Environment(eg.x_range, eg.y_range, list(obstacles.values())) 324 | 325 | self.assertTrue(env.point_is_in_free_space(6,6)) 326 | self.assertTrue(env.point_is_in_free_space(5,5)) 327 | 328 | self.assertFalse(env.point_is_in_free_space(4,4)) 329 | self.assertFalse(env.point_is_in_free_space(0.5,0.5)) 330 | 331 | self.assertTrue(env.point_is_in_free_space(-4,-4)) 332 | self.assertTrue(env.point_is_in_free_space(-5,-5)) 333 | self.assertTrue(env.point_is_in_free_space(-6,-6)) 334 | 335 | 336 | def test_segment_in_free_space(self): 337 | centers = np.array([[0,0]]) 338 | widths = np.array([10]).reshape((len(centers),1)) 339 | heights = np.array([10]).reshape((len(centers),1)) 340 | 341 | eg = EnvironmentGenerator([0, 50], [0, 50], [1,5], [1, 5]) 342 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 343 | 344 | env = Environment(eg.x_range, eg.y_range, list(obstacles.values())) 345 | 346 | self.assertFalse(env.segment_is_in_free_space(-5, 1, 5, 1)) 347 | self.assertTrue(env.segment_is_in_free_space(-5,-4, 5, -4)) 348 | 349 | 350 | def test_save_read(self): 351 | centers = np.array([[4,4], [10,10]]) 352 | widths = np.array([1,1]).reshape((len(centers),1)) 353 | heights = np.array([3,3]).reshape((len(centers),1)) 354 | 355 | eg = EnvironmentGenerator([0, 100], [0, 100], [1,5], [1, 5]) 356 | obstacles = eg.merge_rectangles_into_obstacles(centers, widths, heights, epsilon=0.2) 357 | 358 | env = Environment(eg.x_range, eg.y_range, list(obstacles.values())) 359 | 360 | centers2 = np.array([[5,5], [10,10]]) 361 | widths2 = np.array([1,1]).reshape((len(centers2),1)) 362 | heights2 = np.array([2,2]).reshape((len(centers2),1)) 363 | 364 | eg2 = EnvironmentGenerator([0, 100], [0, 100], [1,5], [1, 5]) 365 | obstacles2 = eg2.merge_rectangles_into_obstacles(centers2, widths2, heights2, epsilon=0.2) 366 | 367 | env2 = Environment(eg2.x_range, eg2.y_range, list(obstacles2.values())) 368 | 369 | ec = EnvironmentCollection() 370 | ec.x_range = [0, 100] 371 | ec.y_range = [0, 100] 372 | ec.width_range = [1,5] 373 | ec.height_range = [1,5] 374 | ec.num_environments = 0 375 | ec.map_collection = {0: env, 1: env2} 376 | 377 | ec.save('./temp.pkl') 378 | 379 | ec2 = EnvironmentCollection() 380 | ec2.read('./temp.pkl') 381 | 382 | self.assertEqual(ec2.x_range, ec.x_range) 383 | self.assertEqual(ec2.y_range, ec.y_range) 384 | self.assertEqual(ec2.map_collection, ec.map_collection) 385 | 386 | os.remove('./temp.pkl') 387 | 388 | 389 | if __name__ == '__main__': 390 | unittest.main() 391 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import os 3 | import gym.envs.mujoco 4 | 5 | custom_envs = { 6 | # Pusher modifications 7 | "PusherMovingGoal-v0": 8 | dict(path='gym_extensions.continuous.mujoco.modified_arm:PusherMovingGoalEnv', 9 | max_episode_steps=100, 10 | reward_threshold=0.0, 11 | kwargs= dict()), 12 | # Pusher modifications 13 | "PusherLeftSide-v0": 14 | dict(path='gym_extensions.continuous.mujoco.modified_arm:PusherLeftSide', 15 | max_episode_steps=100, 16 | reward_threshold=0.0, 17 | kwargs= dict()), 18 | "PusherFullRange-v0": 19 | dict(path='gym_extensions.continuous.mujoco.modified_arm:PusherFullRange', 20 | max_episode_steps=100, 21 | reward_threshold=0.0, 22 | kwargs= dict()), 23 | # Striker 24 | "StrikerMovingStart-v0": 25 | dict(path='gym_extensions.continuous.mujoco.modified_arm:StrikerMovingStartStateEnv', 26 | max_episode_steps=100, 27 | reward_threshold=0.0, 28 | kwargs= dict()), 29 | 30 | # modified gravity - Hopper 31 | "AntGravityMars-v0" : 32 | dict(path='gym_extensions.continuous.mujoco.modified_ant:AntGravityEnv', 33 | max_episode_steps=1000, 34 | reward_threshold=3800.0, 35 | kwargs= dict(gravity=-3.711)), 36 | "AntGravityHalf-v0" : 37 | dict(path='gym_extensions.continuous.mujoco.modified_ant:AntGravityEnv', 38 | max_episode_steps=1000, 39 | reward_threshold=3800.0, 40 | kwargs= dict(gravity=-4.905)), 41 | "AntGravityOneAndHalf-v0" : 42 | dict(path='gym_extensions.continuous.mujoco.modified_ant:AntGravityEnv', 43 | max_episode_steps=1000, 44 | reward_threshold=3800.0, 45 | kwargs= dict(gravity=-14.715)), 46 | 47 | "HopperGravityHalf-v0" : 48 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperGravityEnv', 49 | max_episode_steps=1000, 50 | reward_threshold=3800.0, 51 | kwargs= dict(gravity=-4.905)), 52 | "HopperGravityThreeQuarters-v0" : 53 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperGravityEnv', 54 | max_episode_steps=1000, 55 | reward_threshold=3800.0, 56 | kwargs= dict(gravity=-7.3575)), 57 | "HopperGravityOneAndHalf-v0" : 58 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperGravityEnv', 59 | max_episode_steps=1000, 60 | reward_threshold=3800.0, 61 | kwargs= dict(gravity=-14.715)), 62 | "HopperGravityOneAndQuarter-v0" : 63 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperGravityEnv', 64 | max_episode_steps=1000, 65 | reward_threshold=3800.0, 66 | kwargs= dict(gravity=-12.2625)), 67 | 68 | "Walker2dGravityHalf-v0" : 69 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dGravityEnv', 70 | max_episode_steps=1000, 71 | kwargs= dict(gravity=-4.905)), 72 | "Walker2dGravityThreeQuarters-v0" : 73 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dGravityEnv', 74 | max_episode_steps=1000, 75 | kwargs= dict(gravity=-7.3575)), 76 | "Walker2dGravityOneAndHalf-v0" : 77 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dGravityEnv', 78 | max_episode_steps=1000, 79 | kwargs= dict(gravity=-14.715)), 80 | "Walker2dGravityOneAndQuarter-v0" : 81 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dGravityEnv', 82 | max_episode_steps=1000, 83 | kwargs= dict(gravity=-12.2625)), 84 | 85 | "HalfCheetahGravityHalf-v0" : 86 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahGravityEnv', 87 | max_episode_steps=1000, 88 | reward_threshold=4800.0, 89 | kwargs= dict(gravity=-4.905)), 90 | "HalfCheetahGravityThreeQuarters-v0" : 91 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahGravityEnv', 92 | max_episode_steps=1000, 93 | reward_threshold=4800.0, 94 | kwargs= dict(gravity=-7.3575)), 95 | "HalfCheetahGravityOneAndHalf-v0" : 96 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahGravityEnv', 97 | max_episode_steps=1000, 98 | reward_threshold=4800.0, 99 | kwargs= dict(gravity=-14.715)), 100 | "HalfCheetahGravityOneAndQuarter-v0" : 101 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahGravityEnv', 102 | max_episode_steps=1000, 103 | reward_threshold=4800.0, 104 | kwargs= dict(gravity=-12.2625)), 105 | 106 | "HumanoidGravityHalf-v0" : 107 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidGravityEnv', 108 | max_episode_steps=1000, 109 | kwargs= dict(gravity=-4.905)), 110 | "HumanoidGravityThreeQuarters-v0" : 111 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidGravityEnv', 112 | max_episode_steps=1000, 113 | kwargs= dict(gravity=-7.3575)), 114 | "HumanoidGravityOneAndHalf-v0" : 115 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidGravityEnv', 116 | max_episode_steps=1000, 117 | kwargs= dict(gravity=-14.715)), 118 | "HumanoidGravityOneAndQuarter-v0" : 119 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidGravityEnv', 120 | max_episode_steps=1000, 121 | kwargs= dict(gravity=-12.2625)), 122 | 123 | 124 | 125 | ### Environment with walls 126 | "AntMaze-v0" : 127 | dict(path='gym_extensions.continuous.mujoco.modified_ant:AntMaze', 128 | max_episode_steps=1000, 129 | reward_threshold=3800.0, 130 | kwargs= dict()), 131 | "HopperStairs-v0" : 132 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperStairs', 133 | max_episode_steps=1000, 134 | reward_threshold=3800.0, 135 | kwargs= dict()), 136 | "HopperSimpleWall-v0" : 137 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperSimpleWallEnv', 138 | max_episode_steps=1000, 139 | reward_threshold=3800.0, 140 | kwargs= dict()), 141 | 142 | "HopperWithSensor-v0" : 143 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperWithSensorEnv', 144 | max_episode_steps=1000, 145 | reward_threshold=3800.0, 146 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml")), 147 | "Walker2dWall-v0" : 148 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dWallEnv', 149 | max_episode_steps=1000, 150 | kwargs= dict()), 151 | "Walker2dWithSensor-v0" : 152 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dWithSensorEnv', 153 | max_episode_steps=1000, 154 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/walker2d.xml")), 155 | "HalfCheetahWall-v0" : 156 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahWallEnv', 157 | max_episode_steps=1000, 158 | reward_threshold=4800.0, 159 | kwargs= dict()), 160 | "HalfCheetahWithSensor-v0" : 161 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahWithSensorEnv', 162 | max_episode_steps=1000, 163 | reward_threshold=4800.0, 164 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml")), 165 | "HumanoidWall-v0" : 166 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidWallEnv', 167 | max_episode_steps=1000, 168 | kwargs= dict()), 169 | "HumanoidWithSensor-v0" : 170 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidWithSensorEnv', 171 | max_episode_steps=1000, 172 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoid.xml")), 173 | "HumanoidStandupWithSensor-v0" : 174 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidStandupWithSensorEnv', 175 | max_episode_steps=1000, 176 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml")), 177 | "HumanoidStandupAndRunWall-v0" : 178 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidStandupAndRunWallEnv', 179 | max_episode_steps=1000, 180 | kwargs= dict()), 181 | "HumanoidStandupAndRunWithSensor-v0" : 182 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidStandupAndRunEnvWithSensor', 183 | max_episode_steps=1000, 184 | kwargs= dict(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml")), 185 | "HumanoidStandupAndRun-v0" : 186 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidStandupAndRunEnv', 187 | max_episode_steps=1000, 188 | kwargs= dict()), 189 | 190 | # Modified body parts - Hopper 191 | "HopperBigTorso-v0" : 192 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv', 193 | max_episode_steps=1000, 194 | reward_threshold=3800.0, 195 | kwargs= dict(body_parts=["torso_geom"], size_scale=1.25)), 196 | "HopperBigThigh-v0" : 197 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv', 198 | max_episode_steps=1000, 199 | reward_threshold=3800.0, 200 | kwargs= dict(body_parts=["thigh_geom"], size_scale=1.25)), 201 | "HopperBigLeg-v0" : 202 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv', 203 | max_episode_steps=1000, 204 | reward_threshold=3800.0, 205 | kwargs= dict(body_parts=["leg_geom"], size_scale=1.25)), 206 | "HopperBigFoot-v0" : 207 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv', 208 | max_episode_steps=1000, 209 | reward_threshold=3800.0, 210 | kwargs= dict(body_parts=["foot_geom"], size_scale=1.25)), 211 | "HopperSmallTorso-v0" : 212 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv', 213 | max_episode_steps=1000, 214 | reward_threshold=3800.0, 215 | kwargs= dict(body_parts=["torso_geom"], size_scale=.75)), 216 | "HopperSmallThigh-v0" : 217 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv', 218 | max_episode_steps=1000, 219 | reward_threshold=3800.0, 220 | kwargs= dict(body_parts=["thigh_geom"], size_scale=.75)), 221 | "HopperSmallLeg-v0" : 222 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv', 223 | max_episode_steps=1000, 224 | reward_threshold=3800.0, 225 | kwargs= dict(body_parts=["leg_geom"], size_scale=.75)), 226 | "HopperSmallFoot-v0" : 227 | dict(path='gym_extensions.continuous.mujoco.modified_hopper:HopperModifiedBodyPartSizeEnv', 228 | max_episode_steps=1000, 229 | reward_threshold=3800.0, 230 | kwargs= dict(body_parts=["foot_geom"], size_scale=.75)), 231 | 232 | # Modified body parts - Walker 233 | "Walker2dBigTorso-v0" : 234 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv', 235 | max_episode_steps=1000, 236 | kwargs= dict(body_parts=["torso_geom"], size_scale=1.25)), 237 | "Walker2dBigThigh-v0" : 238 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv', 239 | max_episode_steps=1000, 240 | kwargs= dict(body_parts=["thigh_geom", "thigh_left_geom"], size_scale=1.25)), 241 | "Walker2dBigLeg-v0" : 242 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv', 243 | max_episode_steps=1000, 244 | kwargs= dict(body_parts=["leg_geom", "leg_left_geom"], size_scale=1.25)), 245 | "Walker2dBigFoot-v0" : 246 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv', 247 | max_episode_steps=1000, 248 | kwargs= dict(body_parts=["foot_geom", "foot_left_geom"], size_scale=1.25)), 249 | "Walker2dSmallTorso-v0" : 250 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv', 251 | max_episode_steps=1000, 252 | kwargs= dict(body_parts=["torso_geom"], size_scale=.75)), 253 | "Walker2dSmallThigh-v0" : 254 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv', 255 | max_episode_steps=1000, 256 | kwargs= dict(body_parts=["thigh_geom", "thigh_left_geom"], size_scale=.75)), 257 | "Walker2dSmallLeg-v0" : 258 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv', 259 | max_episode_steps=1000, 260 | kwargs= dict(body_parts=["leg_geom", "leg_left_geom"], size_scale=.75)), 261 | "Walker2dSmallFoot-v0" : 262 | dict(path='gym_extensions.continuous.mujoco.modified_walker2d:Walker2dModifiedBodyPartSizeEnv', 263 | max_episode_steps=1000, 264 | kwargs= dict(body_parts=["foot_geom", "foot_left_geom"], size_scale=.75)), 265 | 266 | # Modified body parts - HalfCheetah 267 | "HalfCheetahBigTorso-v0" : 268 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 269 | max_episode_steps=1000, 270 | reward_threshold=4800.0, 271 | kwargs= dict(body_parts=["torso"], size_scale=1.25)), 272 | "HalfCheetahBigThigh-v0" : 273 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 274 | max_episode_steps=1000, 275 | reward_threshold=4800.0, 276 | kwargs= dict(body_parts=["fthigh", "bthigh"], size_scale=1.25)), 277 | "HalfCheetahBigLeg-v0" : 278 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 279 | max_episode_steps=1000, 280 | reward_threshold=4800.0, 281 | kwargs= dict(body_parts=["fshin", "bshin"], size_scale=1.25)), 282 | "HalfCheetahBigFoot-v0" : 283 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 284 | max_episode_steps=1000, 285 | reward_threshold=4800.0, 286 | kwargs= dict(body_parts=["ffoot", "bfoot"], size_scale=1.25)), 287 | "HalfCheetahSmallTorso-v0" : 288 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 289 | max_episode_steps=1000, 290 | reward_threshold=4800.0, 291 | kwargs= dict(body_parts=["torso"], size_scale=.75)), 292 | "HalfCheetahSmallThigh-v0" : 293 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 294 | max_episode_steps=1000, 295 | reward_threshold=4800.0, 296 | kwargs= dict(body_parts=["fthigh", "bthigh"], size_scale=.75)), 297 | "HalfCheetahSmallLeg-v0" : 298 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 299 | max_episode_steps=1000, 300 | reward_threshold=4800.0, 301 | kwargs= dict(body_parts=["fshin", "bshin"], size_scale=.75)), 302 | "HalfCheetahSmallFoot-v0" : 303 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 304 | max_episode_steps=1000, 305 | reward_threshold=4800.0, 306 | kwargs= dict(body_parts=["ffoot", "bfoot"], size_scale=.75)), 307 | "HalfCheetahSmallHead-v0" : 308 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 309 | max_episode_steps=1000, 310 | reward_threshold=4800.0, 311 | kwargs= dict(body_parts=["head"], size_scale=.75)), 312 | "HalfCheetahBigHead-v0" : 313 | dict(path='gym_extensions.continuous.mujoco.modified_half_cheetah:HalfCheetahModifiedBodyPartSizeEnv', 314 | max_episode_steps=1000, 315 | reward_threshold=4800.0, 316 | kwargs= dict(body_parts=["head"], size_scale=1.25)), 317 | 318 | 319 | # Modified body parts - Humanoid 320 | "HumanoidBigTorso-v0" : 321 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 322 | max_episode_steps=1000, 323 | kwargs= dict(body_parts=["torso1", "uwaist", "lwaist"], size_scale=1.25)), 324 | "HumanoidBigThigh-v0" : 325 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 326 | max_episode_steps=1000, 327 | kwargs= dict(body_parts=["right_thigh1", "left_thigh1", "butt"], size_scale=1.25)), 328 | "HumanoidBigLeg-v0" : 329 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 330 | max_episode_steps=1000, 331 | kwargs= dict(body_parts=["right_shin1", "left_shin1"], size_scale=1.25)), 332 | "HumanoidBigFoot-v0" : 333 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 334 | max_episode_steps=1000, 335 | kwargs= dict(body_parts=["left_foot", "right_foot"], size_scale=1.25)), 336 | "HumanoidSmallTorso-v0" : 337 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 338 | max_episode_steps=1000, 339 | kwargs= dict(body_parts=["torso1", "uwaist", "lwaist"], size_scale=.75)), 340 | "HumanoidSmallThigh-v0" : 341 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 342 | max_episode_steps=1000, 343 | kwargs= dict(body_parts=["right_thigh1", "left_thigh1", "butt"], size_scale=.75)), 344 | "HumanoidSmallLeg-v0" : 345 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 346 | max_episode_steps=1000, 347 | kwargs= dict(body_parts=["right_shin1", "left_shin1"], size_scale=.75)), 348 | "HumanoidSmallFoot-v0" : 349 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 350 | max_episode_steps=1000, 351 | kwargs= dict(body_parts=["left_foot", "right_foot"], size_scale=.75)), 352 | "HumanoidSmallHead-v0" : 353 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 354 | max_episode_steps=1000, 355 | kwargs= dict(body_parts=["head"], size_scale=.75)), 356 | "HumanoidBigHead-v0" : 357 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 358 | max_episode_steps=1000, 359 | kwargs= dict(body_parts=["head"], size_scale=1.25)), 360 | "HumanoidSmallArm-v0" : 361 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 362 | max_episode_steps=1000, 363 | kwargs= dict(body_parts=["right_uarm1", "right_larm", "left_uarm1", "left_larm"], size_scale=.75)), 364 | "HumanoidBigArm-v0" : 365 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 366 | max_episode_steps=1000, 367 | kwargs= dict(body_parts=["right_uarm1", "right_larm", "left_uarm1", "left_larm"], size_scale=1.25)), 368 | "HumanoidSmallHand-v0" : 369 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 370 | max_episode_steps=1000, 371 | kwargs= dict(body_parts=["left_hand", "right_hand"], size_scale=.75)), 372 | "HumanoidBigHand-v0" : 373 | dict(path='gym_extensions.continuous.mujoco.modified_humanoid:HumanoidModifiedBodyPartSizeEnv', 374 | max_episode_steps=1000, 375 | kwargs= dict(body_parts=["left_hand", "right_hand"], size_scale=1.25)), 376 | } 377 | 378 | def register_custom_envs(): 379 | for key, value in custom_envs.items(): 380 | arg_dict = dict(id=key, 381 | entry_point=value["path"], 382 | max_episode_steps=value["max_episode_steps"], 383 | kwargs=value["kwargs"]) 384 | 385 | if "reward_threshold" in value: 386 | arg_dict["reward_threshold"] = value["reward_threshold"] 387 | 388 | gym.envs.register(**arg_dict) 389 | 390 | register_custom_envs() 391 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/gravity_envs.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import tempfile 3 | import xml.etree.ElementTree as ET 4 | import math 5 | 6 | import numpy as np 7 | import gym 8 | import random 9 | import os 10 | from gym import utils 11 | from gym.envs.mujoco import mujoco_env 12 | import mujoco_py 13 | 14 | def GravityEnvFactory(class_type): 15 | """class_type should be an OpenAI gym type""" 16 | 17 | class GravityEnv(class_type, utils.EzPickle): 18 | """ 19 | Allows the gravity to be changed by the 20 | """ 21 | def __init__( 22 | self, 23 | model_path, 24 | gravity=-9.81, 25 | *args, 26 | **kwargs): 27 | class_type.__init__(self, model_path=model_path) 28 | utils.EzPickle.__init__(self) 29 | 30 | # make sure we're using a proper OpenAI gym Mujoco Env 31 | assert isinstance(self, mujoco_env.MujocoEnv) 32 | 33 | self.model.opt.gravity = (mujoco_py.mjtypes.c_double * 3)(*[0., 0., gravity]) 34 | self.model._compute_subtree() 35 | self.model.forward() 36 | 37 | return GravityEnv 38 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/modified_ant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | import os.path as osp 5 | from gym_extensions.continuous.mujoco.wall_envs import MazeFactory 6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory 7 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory 8 | 9 | from gym.envs.mujoco.ant import AntEnv 10 | 11 | import os 12 | import gym 13 | 14 | 15 | AntGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedAntEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", *args, **kwargs) 16 | 17 | 18 | AntMaze = lambda *args, **kwargs : MazeFactory(ModifiedAntEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", ori_ind=0, *args, **kwargs) 19 | 20 | 21 | 22 | 23 | class ModifiedAntEnv(AntEnv, utils.EzPickle): 24 | """ 25 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 26 | """ 27 | def __init__(self, **kwargs): 28 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4) 29 | utils.EzPickle.__init__(self) 30 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/modified_arm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | import os.path as osp 5 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory 6 | from gym.envs.mujoco.pusher import PusherEnv 7 | from gym.envs.mujoco.striker import StrikerEnv 8 | 9 | import os 10 | import gym 11 | import random 12 | import six 13 | 14 | class PusherFullRange(PusherEnv, utils.EzPickle): 15 | """ 16 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 17 | """ 18 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/pusher.xml", **kwargs): 19 | mujoco_env.MujocoEnv.__init__(self, model_path, 5) 20 | utils.EzPickle.__init__(self) 21 | 22 | # make sure we're using a proper OpenAI gym Mujoco Env 23 | assert isinstance(self, mujoco_env.MujocoEnv) 24 | 25 | self.model.jnt_range = self.get_and_modify_joint_range('r_shoulder_pan_joint') 26 | self.model._compute_subtree() 27 | self.model.forward() 28 | 29 | def get_and_modify_joint_range(self, body_name, new_array=np.array([-.854, 2.714602])): 30 | idx = self.model.joint_names.index(six.b(body_name)) 31 | temp = np.copy(self.model.jnt_range) 32 | temp[idx] = new_array 33 | return temp 34 | 35 | def reset_model(self): 36 | qpos = self.init_qpos 37 | 38 | goal_pos_map = { 'left' : np.array([-0.05, -1.1230]), 'right' : np.array([0.0, 0.0])} 39 | object_start_pos_map = {'left' : np.array([[-0.3, 0.0], [-1.1, -.8]]), 'right' : np.array([[-0.3, 0.0], [-0.2, 0.2]]) } 40 | position = random.choice(['left', 'right']) 41 | 42 | self.goal_pos = goal_pos_map[position] 43 | object_start_range = object_start_pos_map[position] 44 | 45 | while True: 46 | 47 | self.cylinder_pos = np.concatenate([ 48 | self.np_random.uniform(low=object_start_range[0][0], high=object_start_range[0][1], size=1), 49 | self.np_random.uniform(low=object_start_range[1][0], high=object_start_range[1][1], size=1)]) 50 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: 51 | break 52 | 53 | qpos[-4:-2] = self.cylinder_pos 54 | qpos[-2:] = self.goal_pos 55 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 56 | high=0.005, size=self.model.nv) 57 | qvel[-4:] = 0 58 | self.set_state(qpos, qvel) 59 | return self._get_obs() 60 | 61 | 62 | class PusherLeftSide(PusherEnv, utils.EzPickle): 63 | """ 64 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 65 | """ 66 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/pusher.xml", **kwargs): 67 | mujoco_env.MujocoEnv.__init__(self, model_path, 5) 68 | utils.EzPickle.__init__(self) 69 | 70 | # make sure we're using a proper OpenAI gym Mujoco Env 71 | assert isinstance(self, mujoco_env.MujocoEnv) 72 | 73 | self.model.jnt_range = self.get_and_modify_joint_range('r_shoulder_pan_joint') 74 | self.model._compute_subtree() 75 | self.model.forward() 76 | 77 | def get_and_modify_joint_range(self, body_name, new_array=np.array([-.854, 2.714602])): 78 | idx = self.model.joint_names.index(six.b(body_name)) 79 | temp = np.copy(self.model.jnt_range) 80 | temp[idx] = new_array 81 | return temp 82 | 83 | def reset_model(self): 84 | qpos = self.init_qpos 85 | self.goal_pos = np.array([-0.05, -1.1230]) 86 | 87 | while True: 88 | self.cylinder_pos = np.concatenate([ 89 | self.np_random.uniform(low=-0.3, high=0, size=1), 90 | self.np_random.uniform(low=-1.1, high=-.8, size=1)]) 91 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: 92 | break 93 | 94 | qpos[-4:-2] = self.cylinder_pos 95 | qpos[-2:] = self.goal_pos 96 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 97 | high=0.005, size=self.model.nv) 98 | qvel[-4:] = 0 99 | self.set_state(qpos, qvel) 100 | return self._get_obs() 101 | 102 | class PusherMovingGoalEnv(PusherEnv, utils.EzPickle): 103 | """ 104 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 105 | """ 106 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/pusher.xml", **kwargs): 107 | mujoco_env.MujocoEnv.__init__(self, model_path, 5) 108 | utils.EzPickle.__init__(self) 109 | 110 | def reset_model(self): 111 | qpos = self.init_qpos 112 | 113 | self.goal_pos = np.concatenate([ 114 | self.np_random.uniform(low=-0.2, high=0, size=1), 115 | self.np_random.uniform(low=-0.1, high=0.3, size=1)]) 116 | 117 | while True: 118 | self.cylinder_pos = np.concatenate([ 119 | self.np_random.uniform(low=-0.3, high=0, size=1), 120 | self.np_random.uniform(low=-0.2, high=0.2, size=1)]) 121 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: 122 | break 123 | 124 | qpos[-4:-2] = self.cylinder_pos 125 | qpos[-2:] = self.goal_pos 126 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 127 | high=0.005, size=self.model.nv) 128 | qvel[-4:] = 0 129 | self.set_state(qpos, qvel) 130 | return self._get_obs() 131 | 132 | 133 | class StrikerMovingStartStateEnv(StrikerEnv, utils.EzPickle): 134 | """ 135 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 136 | """ 137 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/striker.xml", **kwargs): 138 | self._striked = False 139 | self._min_strike_dist = np.inf 140 | self.strike_threshold = 0.1 141 | mujoco_env.MujocoEnv.__init__(self, model_path, 5) 142 | utils.EzPickle.__init__(self) 143 | 144 | def reset_model(self): 145 | self._min_strike_dist = np.inf 146 | self._striked = False 147 | self._strike_pos = None 148 | 149 | qpos = self.init_qpos 150 | 151 | self.ball = np.concatenate([ 152 | self.np_random.uniform(low=0.43, high=0.55, size=1), 153 | self.np_random.uniform(low=-0.05, high=-.3, size=1)]) 154 | 155 | while True: 156 | self.goal = np.concatenate([ 157 | self.np_random.uniform(low=0.15, high=0.7, size=1), 158 | self.np_random.uniform(low=0.1, high=1.0, size=1)]) 159 | if np.linalg.norm(self.ball - self.goal) > 0.17: 160 | break 161 | 162 | qpos[-9:-7] = [self.ball[1], self.ball[0]] 163 | qpos[-7:-5] = self.goal 164 | diff = self.ball - self.goal 165 | angle = -np.arctan(diff[0] / (diff[1] + 1e-8)) 166 | qpos[-1] = angle / 3.14 167 | qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1, 168 | size=self.model.nv) 169 | qvel[7:] = 0 170 | self.set_state(qpos, qvel) 171 | return self._get_obs() 172 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/modified_half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | import os.path as osp 5 | from gym_extensions.continuous.mujoco.wall_envs import WallEnvFactory 6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory 7 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 8 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory 9 | 10 | import os 11 | import gym 12 | 13 | HalfCheetahWallEnv = lambda *args, **kwargs : WallEnvFactory(ModifiedHalfCheetahEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml", ori_ind=-1, *args, **kwargs) 14 | 15 | HalfCheetahGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedHalfCheetahEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml", *args, **kwargs) 16 | 17 | HalfCheetahModifiedBodyPartSizeEnv = lambda *args, **kwargs : ModifiedSizeEnvFactory(ModifiedHalfCheetahEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml", *args, **kwargs) 18 | 19 | 20 | class ModifiedHalfCheetahEnv(HalfCheetahEnv, utils.EzPickle): 21 | """ 22 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 23 | """ 24 | def __init__(self, **kwargs): 25 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5) 26 | utils.EzPickle.__init__(self) 27 | 28 | class HalfCheetahWithSensorEnv(HalfCheetahEnv, utils.EzPickle): 29 | """ 30 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall 31 | """ 32 | 33 | def __init__(self, n_bins=10, **kwargs): 34 | self.n_bins = n_bins 35 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5) 36 | utils.EzPickle.__init__(self) 37 | 38 | 39 | def _get_obs(self): 40 | obs = np.concatenate([ 41 | HalfCheetahEnv._get_obs(self), 42 | np.zeros(self.n_bins) 43 | # goal_readings 44 | ]) 45 | return obs 46 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/modified_hopper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | import os.path as osp 5 | from gym_extensions.continuous.mujoco.wall_envs import * 6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory 7 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory 8 | 9 | from gym.envs.mujoco.hopper import HopperEnv 10 | 11 | import os 12 | import gym 13 | 14 | HopperSimpleWallEnv = lambda *args, **kwargs : SimpleWallEnvFactory(ModifiedHopperEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml", ori_ind=-1, *args, **kwargs) 15 | 16 | HopperStairs = lambda *args, **kwargs : StairsFactory(ModifiedHopperEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml", ori_ind=-1, *args, **kwargs) 17 | 18 | HopperGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedHopperEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml", *args, **kwargs) 19 | 20 | HopperModifiedBodyPartSizeEnv = lambda *args, **kwargs : ModifiedSizeEnvFactory(ModifiedHopperEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/hopper.xml", *args, **kwargs) 21 | 22 | 23 | class ModifiedHopperEnv(HopperEnv, utils.EzPickle): 24 | """ 25 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 26 | """ 27 | def __init__(self, **kwargs): 28 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4) 29 | utils.EzPickle.__init__(self) 30 | 31 | class HopperWithSensorEnv(HopperEnv, utils.EzPickle): 32 | """ 33 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall 34 | """ 35 | 36 | def __init__(self, n_bins=10, **kwargs): 37 | self.n_bins = n_bins 38 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4) 39 | utils.EzPickle.__init__(self) 40 | 41 | 42 | def _get_obs(self): 43 | obs = np.concatenate([ 44 | HopperEnv._get_obs(self), 45 | np.zeros(self.n_bins) 46 | # goal_readings 47 | ]) 48 | return obs 49 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/modified_humanoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | import os.path as osp 5 | from gym_extensions.continuous.mujoco.wall_envs import WallEnvFactory 6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory 7 | from gym.envs.mujoco.humanoid import HumanoidEnv, mass_center 8 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv 9 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory 10 | 11 | import os 12 | import gym 13 | 14 | HumanoidWallEnv = lambda *args, **kwargs : WallEnvFactory(ModifiedHumanoidEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoid.xml", ori_ind=-1, *args, **kwargs) 15 | 16 | HumanoidStandupAndRunWallEnv = lambda *args, **kwargs : WallEnvFactory(HumanoidStandupAndRunEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml", ori_ind=-1, *args, **kwargs) 17 | 18 | HumanoidGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedHumanoidEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoid.xml", *args, **kwargs) 19 | 20 | HumanoidModifiedBodyPartSizeEnv = lambda *args, **kwargs : ModifiedSizeEnvFactory(ModifiedHumanoidEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoid.xml", *args, **kwargs) 21 | 22 | 23 | class ModifiedHumanoidEnv(HumanoidEnv, utils.EzPickle): 24 | """ 25 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 26 | """ 27 | def __init__(self, **kwargs): 28 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5) 29 | utils.EzPickle.__init__(self) 30 | 31 | class HumanoidWithSensorEnv(HumanoidEnv, utils.EzPickle): 32 | """ 33 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall 34 | """ 35 | 36 | def __init__(self, n_bins=10, **kwargs): 37 | self.n_bins = n_bins 38 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5) 39 | utils.EzPickle.__init__(self) 40 | 41 | 42 | def _get_obs(self): 43 | obs = np.concatenate([ 44 | HumanoidEnv._get_obs(self), 45 | np.zeros(self.n_bins) 46 | # goal_readings 47 | ]) 48 | return obs 49 | 50 | 51 | class HumanoidStandupWithSensorEnv(HumanoidStandupEnv, utils.EzPickle): 52 | """ 53 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall 54 | """ 55 | 56 | def __init__(self, n_bins=10, **kwargs): 57 | self.n_bins = n_bins 58 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 5) 59 | utils.EzPickle.__init__(self) 60 | 61 | 62 | def _get_obs(self): 63 | obs = np.concatenate([ 64 | HumanoidStandupEnv._get_obs(self), 65 | np.zeros(self.n_bins) 66 | # goal_readings 67 | ]) 68 | return obs 69 | 70 | class HumanoidStandupAndRunEnv(HumanoidEnv, utils.EzPickle): 71 | """ 72 | Environment that rewards standing up and the running, this is the combination of Humanoid-v1 and HumanoidStandup-v1 73 | """ 74 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml", **kwargs): 75 | mujoco_env.MujocoEnv.__init__(self, model_path, 5) 76 | utils.EzPickle.__init__(self) 77 | 78 | def _step(self, a): 79 | pos_before = mass_center(self.model) 80 | self.do_simulation(a, self.frame_skip) 81 | pos_after = mass_center(self.model) 82 | 83 | pos_after_standup = self.model.data.qpos[2][0] 84 | 85 | down = bool(( self.model.data.qpos[2] < 1.0) or ( self.model.data.qpos[2] > 2.0)) 86 | 87 | alive_bonus = 5.0 if not down else 1.0 88 | 89 | data = self.model.data 90 | 91 | uph_cost = (pos_after_standup - 0) / self.model.opt.timestep 92 | lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep 93 | 94 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 95 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() 96 | quad_impact_cost = min(quad_impact_cost, 10) 97 | 98 | reward = lin_vel_cost + uph_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus 99 | qpos = self.model.data.qpos 100 | 101 | done = bool(False) 102 | return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost, reward_alive=alive_bonus) 103 | 104 | 105 | class HumanoidStandupAndRunEnvWithSensor(HumanoidStandupAndRunEnv, utils.EzPickle): 106 | """ 107 | Environment that rewards standing up and the running, this is the combination of Humanoid-v1 and HumanoidStandup-v1, also adds empty sensor 108 | readouts to the state space to make compatible with transfer learning for the WallEnv variant 109 | """ 110 | def __init__(self, model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/humanoidstandup.xml", n_bins=10, **kwargs): 111 | self.n_bins = n_bins 112 | mujoco_env.MujocoEnv.__init__(self, model_path, 5) 113 | utils.EzPickle.__init__(self) 114 | 115 | def _get_obs(self): 116 | obs = np.concatenate([ 117 | HumanoidStandupAndRunEnv._get_obs(self), 118 | np.zeros(self.n_bins) 119 | # goal_readings 120 | ]) 121 | return obs 122 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/modified_walker2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | import os.path as osp 5 | from gym_extensions.continuous.mujoco.wall_envs import WallEnvFactory 6 | from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory 7 | from gym.envs.mujoco.walker2d import Walker2dEnv 8 | from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory 9 | 10 | import os 11 | import gym 12 | 13 | Walker2dWallEnv = lambda *args, **kwargs : WallEnvFactory(ModifiedWalker2dEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/walker2d.xml", ori_ind=-1, *args, **kwargs) 14 | 15 | Walker2dGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedWalker2dEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/walker2d.xml", *args, **kwargs) 16 | 17 | Walker2dModifiedBodyPartSizeEnv = lambda *args, **kwargs : ModifiedSizeEnvFactory(ModifiedWalker2dEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/walker2d.xml", *args, **kwargs) 18 | 19 | 20 | class ModifiedWalker2dEnv(Walker2dEnv, utils.EzPickle): 21 | """ 22 | Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym 23 | """ 24 | def __init__(self, **kwargs): 25 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4) 26 | utils.EzPickle.__init__(self) 27 | 28 | class Walker2dWithSensorEnv(Walker2dEnv, utils.EzPickle): 29 | """ 30 | Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall 31 | """ 32 | 33 | def __init__(self, n_bins=10, **kwargs): 34 | self.n_bins = n_bins 35 | mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4) 36 | utils.EzPickle.__init__(self) 37 | 38 | 39 | def _get_obs(self): 40 | obs = np.concatenate([ 41 | Walker2dEnv._get_obs(self), 42 | np.zeros(self.n_bins) 43 | # goal_readings 44 | ]) 45 | return obs 46 | -------------------------------------------------------------------------------- /gym_extensions/continuous/mujoco/perturbed_bodypart_env.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import tempfile 3 | import xml.etree.ElementTree as ET 4 | import math 5 | 6 | import numpy as np 7 | import gym 8 | import random 9 | import os 10 | from gym import utils 11 | from gym.envs.mujoco import mujoco_env 12 | import mujoco_py 13 | import six 14 | 15 | def ModifiedMassEnvFactory(class_type): 16 | """class_type should be an OpenAI gym type""" 17 | 18 | 19 | class ModifiedMassEnv(class_type, utils.EzPickle): 20 | """ 21 | Allows the gravity to be changed by the 22 | """ 23 | def __init__( 24 | self, 25 | model_path, 26 | body_part="torso", 27 | mass_scale=1.0, 28 | *args, 29 | **kwargs): 30 | class_type.__init__(self, model_path=model_path) 31 | utils.EzPickle.__init__(self) 32 | 33 | # make sure we're using a proper OpenAI gym Mujoco Env 34 | assert isinstance(self, mujoco_env.MujocoEnv) 35 | 36 | self.model.body_mass = self.get_and_modify_bodymass(body_part, mass_scale) 37 | self.model._compute_subtree() 38 | self.model.forward() 39 | 40 | def get_and_modify_bodymass(self, body_name, scale): 41 | idx = self.model.body_names.index(six.b(body_name)) 42 | temp = np.copy(self.model.body_mass) 43 | temp[idx] *= scale 44 | return temp 45 | 46 | 47 | 48 | return ModifiedMassEnv 49 | 50 | def ModifiedSizeEnvFactory(class_type): 51 | """class_type should be an OpenAI gym type""" 52 | 53 | 54 | 55 | class ModifiedSizeEnv(class_type, utils.EzPickle): 56 | """ 57 | Allows the gravity to be changed by the 58 | """ 59 | 60 | 61 | def __init__( 62 | self, 63 | model_path, 64 | body_parts=["torso"], 65 | size_scale=1.0, 66 | *args, 67 | **kwargs): 68 | 69 | assert isinstance(self, mujoco_env.MujocoEnv) 70 | 71 | # find the body_part we want 72 | tree = ET.parse(model_path) 73 | for body_part in body_parts: 74 | # torso = tree.find(".//body[@name='%s']" % body_part) 75 | 76 | # grab the geoms 77 | geom = tree.find(".//geom[@name='%s']" % body_part) 78 | 79 | sizes = [float(x) for x in geom.attrib["size"].split(" ")] 80 | 81 | # rescale 82 | geom.attrib["size"] = " ".join([str(x * size_scale) for x in sizes ]) # the first one should always be the thing we want. 83 | 84 | # TODO: in the future we want to also be able to make it longer or shorter, but this requires propagation of the fromto attribute 85 | # so like a middle part isn't super long but the other parts connect at the same spot. 86 | 87 | # fromto = [] 88 | # for x in geoms[0].attrib["fromto"].split(" "): 89 | # fromto.append(float(x)) 90 | # fromto = [x*length_scale for x in fromto] 91 | # geoms[0].attrib["fromto"] = str() * length_scale) # the first one should always be the thing we want. 92 | 93 | # create new xml 94 | _, file_path = tempfile.mkstemp(text=True) 95 | tree.write(file_path) 96 | 97 | # load the modified xml 98 | class_type.__init__(self, model_path=file_path) 99 | utils.EzPickle.__init__(self) 100 | 101 | 102 | # def get_and_modify_bodysize(self, body_name, scale): 103 | # idx = self.model.body_names.index(six.b(body_name)) 104 | # temp = np.copy(self.model.geom_size) 105 | # temp[idx] *= scale 106 | # return temp 107 | 108 | return ModifiedSizeEnv 109 | -------------------------------------------------------------------------------- /gym_extensions/discrete/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/gym_extensions/discrete/__init__.py -------------------------------------------------------------------------------- /gym_extensions/discrete/classic/__init__.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import os 3 | 4 | custom_envs = { 5 | # Contextual environments 6 | "CartPoleContextual-v0": 7 | dict(path='gym_extensions.discrete.classic.cartpole_contextual:CartPoleContextualEnv', 8 | max_episode_steps=500, 9 | kwargs= dict()), 10 | "PendulumContextual-v0": 11 | dict(path='gym_extensions.discrete.classic.pendulum_contextual:PendulumContextualEnv', 12 | max_episode_steps=500, 13 | kwargs= dict()) 14 | } 15 | 16 | def register_custom_envs(): 17 | for key, value in custom_envs.items(): 18 | arg_dict = dict(id=key, 19 | entry_point=value["path"], 20 | max_episode_steps=value["max_episode_steps"], 21 | kwargs=value["kwargs"]) 22 | 23 | if "reward_threshold" in value: 24 | arg_dict["reward_threshold"] = value["reward_threshold"] 25 | 26 | gym.envs.register(**arg_dict) 27 | 28 | register_custom_envs() 29 | -------------------------------------------------------------------------------- /gym_extensions/discrete/classic/cartpole.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classic cart-pole system implemented by Rich Sutton et al. 3 | Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c 4 | """ 5 | 6 | import logging 7 | import math 8 | import gym 9 | from gym import spaces 10 | from gym.utils import seeding 11 | import numpy as np 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | def register_custom_cartpole(tag, gravity=9.8, masscart=1.0, masspole=0.1, pole_length=.5, force_mag=10.0): 16 | """ 17 | Tag - What to call your env (e.g. CustomCartpoleLongPole-v0, CustomCartpoleLongPole-v1) 18 | gravity - if you want to modify the gravity factor (default 9.8) 19 | masscart - the mass of the cartpole base 20 | masspole - the mass of the pole 21 | length - the length of the pole 22 | force_mag - the magnitude of the exerted action force 23 | """ 24 | return gym.envs.register( 25 | id=tag, 26 | entry_point="envs.transfer.classic.cartpole:CustomizableCartPoleEnv", 27 | max_episode_steps=200, 28 | reward_threshold=195.0, 29 | kwargs= dict(gravity = gravity, masscart = masscart, masspole = masspole, length = pole_length, force_mag = force_mag) 30 | ) 31 | 32 | 33 | class CustomizableCartPoleEnv(gym.Env): 34 | metadata = { 35 | 'render.modes': ['human', 'rgb_array'], 36 | 'video.frames_per_second' : 50 37 | } 38 | 39 | def __init__(self, gravity=9.8, masscart=1.0, masspole=0.1, length = .5, force_mag = 10.0): 40 | self.gravity = gravity 41 | self.masscart = masscart 42 | self.masspole = masspole 43 | self.total_mass = (self.masspole + self.masscart) 44 | self.length = length # actually half the pole's length 45 | self.polemass_length = (self.masspole * self.length) 46 | self.force_mag = force_mag 47 | self.tau = 0.02 # seconds between state updates 48 | 49 | # Angle at which to fail the episode 50 | self.theta_threshold_radians = 12 * 2 * math.pi / 360 51 | self.x_threshold = 2.4 52 | 53 | # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds 54 | high = np.array([ 55 | self.x_threshold * 2, 56 | np.finfo(np.float32).max, 57 | self.theta_threshold_radians * 2, 58 | np.finfo(np.float32).max]) 59 | 60 | self.action_space = spaces.Discrete(2) 61 | self.observation_space = spaces.Box(-high, high) 62 | 63 | self._seed() 64 | self.viewer = None 65 | self.state = None 66 | 67 | self.steps_beyond_done = None 68 | 69 | def _seed(self, seed=None): 70 | self.np_random, seed = seeding.np_random(seed) 71 | return [seed] 72 | 73 | def _step(self, action): 74 | assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) 75 | state = self.state 76 | x, x_dot, theta, theta_dot = state 77 | force = self.force_mag if action==1 else -self.force_mag 78 | costheta = math.cos(theta) 79 | sintheta = math.sin(theta) 80 | temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass 81 | thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass)) 82 | xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass 83 | x = x + self.tau * x_dot 84 | x_dot = x_dot + self.tau * xacc 85 | theta = theta + self.tau * theta_dot 86 | theta_dot = theta_dot + self.tau * thetaacc 87 | self.state = (x,x_dot,theta,theta_dot) 88 | done = x < -self.x_threshold \ 89 | or x > self.x_threshold \ 90 | or theta < -self.theta_threshold_radians \ 91 | or theta > self.theta_threshold_radians 92 | done = bool(done) 93 | 94 | if not done: 95 | reward = 1.0 96 | elif self.steps_beyond_done is None: 97 | # Pole just fell! 98 | self.steps_beyond_done = 0 99 | reward = 1.0 100 | else: 101 | if self.steps_beyond_done == 0: 102 | logger.warning("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") 103 | self.steps_beyond_done += 1 104 | reward = 0.0 105 | 106 | return np.array(self.state), reward, done, {} 107 | 108 | def _reset(self): 109 | self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) 110 | self.steps_beyond_done = None 111 | return np.array(self.state) 112 | 113 | def _render(self, mode='human', close=False): 114 | if close: 115 | if self.viewer is not None: 116 | self.viewer.close() 117 | self.viewer = None 118 | return 119 | 120 | screen_width = 600 121 | screen_height = 400 122 | 123 | world_width = self.x_threshold*2 124 | scale = screen_width/world_width 125 | carty = 100 # TOP OF CART 126 | polewidth = 10.0 127 | polelen = scale * 1.0 128 | cartwidth = 50.0 129 | cartheight = 30.0 130 | 131 | if self.viewer is None: 132 | from gym.envs.classic_control import rendering 133 | self.viewer = rendering.Viewer(screen_width, screen_height) 134 | l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/2 135 | axleoffset =cartheight/4.0 136 | cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)]) 137 | self.carttrans = rendering.Transform() 138 | cart.add_attr(self.carttrans) 139 | self.viewer.add_geom(cart) 140 | l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2 141 | pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)]) 142 | pole.set_color(.8,.6,.4) 143 | self.poletrans = rendering.Transform(translation=(0, axleoffset)) 144 | pole.add_attr(self.poletrans) 145 | pole.add_attr(self.carttrans) 146 | self.viewer.add_geom(pole) 147 | self.axle = rendering.make_circle(polewidth/2) 148 | self.axle.add_attr(self.poletrans) 149 | self.axle.add_attr(self.carttrans) 150 | self.axle.set_color(.5,.5,.8) 151 | self.viewer.add_geom(self.axle) 152 | self.track = rendering.Line((0,carty), (screen_width,carty)) 153 | self.track.set_color(0,0,0) 154 | self.viewer.add_geom(self.track) 155 | 156 | if self.state is None: return None 157 | 158 | x = self.state 159 | cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART 160 | self.carttrans.set_translation(cartx, carty) 161 | self.poletrans.set_rotation(-x[2]) 162 | 163 | return self.viewer.render(return_rgb_array = mode=='rgb_array') 164 | -------------------------------------------------------------------------------- /gym_extensions/discrete/classic/cartpole_contextual.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classic cart-pole system implemented by Rich Sutton et al. 3 | Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c 4 | """ 5 | 6 | import logging 7 | import math 8 | import gym 9 | from gym import spaces 10 | from gym.envs.classic_control.cartpole import CartPoleEnv 11 | from gym.utils import seeding 12 | import numpy as np 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | def register_custom_cartpole(tag, gravity=9.8, masscart=1.0, masspole=0.1, pole_length=.5, force_mag=10.0): 17 | """ 18 | Tag - What to call your env (e.g. CustomCartpoleLongPole-v0, CustomCartpoleLongPole-v1) 19 | gravity - if you want to modify the gravity factor (default 9.8) 20 | masscart - the mass of the cartpole base 21 | masspole - the mass of the pole 22 | length - the length of the pole 23 | force_mag - the magnitude of the exerted action force 24 | """ 25 | return gym.envs.register( 26 | id=tag, 27 | entry_point="envs.transfer.classic.cartpole_contextual:CartPoleContextualEnv", 28 | max_episode_steps=200, 29 | reward_threshold=195.0, 30 | kwargs= dict(gravity = gravity, masscart = masscart, masspole = masspole, length = pole_length, force_mag = force_mag) 31 | ) 32 | 33 | class CartPoleContextualEnv(CartPoleEnv): 34 | metadata = { 35 | 'render.modes': ['human', 'rgb_array'], 36 | 'video.frames_per_second' : 50 37 | } 38 | 39 | def __init__(self,gravity=9.8, masscart=1.0, masspole=0.1, length = .5, force_mag = 10.0): 40 | super(CartPoleContextualEnv, self).__init__() 41 | self.context = [masscart, masspole, length, force_mag] 42 | self.gravity = gravity # not including in the context for now 43 | #self.masscart = self.context[0] 44 | self.masspole = self.context[1] 45 | self.length = self.context[2] 46 | self.force_mag = self.context[3] 47 | 48 | # our own responsibility to define the range of the context, since we define it 49 | self.context_high = np.array([ 50 | self.masscart * 2, 51 | self.masspole * 10, 52 | self.length * 2, 53 | self.force_mag * 2]) 54 | 55 | # the params in the given context can't be less or equal to zero! 56 | self.context_low = np.array([ 0.1, 0.1, 0.1, 0.1]) 57 | 58 | 59 | def _step(self, action): 60 | state, reward, done, _ = super(CartPoleContextualEnv, self)._step(action) 61 | return state, reward, done, {'masscart':self.masscart, 'masspole':self.masspole, 'pole_length':self.length, 'force_magnitude':self.force_mag} 62 | 63 | 64 | def change_context(self, context_vector): 65 | self.masscart = context_vector 66 | 67 | 68 | def context_space_info(self): 69 | context_info_dict = {} 70 | 71 | context_info_dict['context_vals'] = [1.0, 0.1, 0.5, 10.0] 72 | context_info_dict['context_high'] = self.context_high.tolist() # to make sure it can be serialized in json files 73 | context_info_dict['context_low' ] = self.context_low.tolist() 74 | context_info_dict['state_dims' ] = 4 75 | context_info_dict['action_dims' ] = 1 76 | context_info_dict['action_space'] = 'discrete' 77 | context_info_dict['state_high' ] = self.observation_space.high.tolist() 78 | context_info_dict['state_low' ] = self.observation_space.low.tolist() 79 | context_info_dict['action_high' ] = 1 80 | context_info_dict['action_low' ] = 0 81 | 82 | return context_info_dict 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /gym_extensions/discrete/classic/pendulum_contextual.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | import gym 4 | from gym import spaces 5 | from gym.envs.classic_control.pendulum import PendulumEnv 6 | from gym.utils import seeding 7 | import numpy as np 8 | #from os import path 9 | logger = logging.getLogger(__name__) 10 | 11 | def register_custom_cartpole(tag, max_speed = 8, max_torque = 2.0): 12 | """ 13 | Tag - What to call your env (e.g. CustomCartpoleLongPole-v0, CustomCartpoleLongPole-v1) 14 | gravity - if you want to modify the gravity factor (default 9.8) 15 | masscart - the mass of the cartpole base 16 | masspole - the mass of the pole 17 | length - the length of the pole 18 | force_mag - the magnitude of the exerted action force 19 | """ 20 | return gym.envs.register( 21 | id=tag, 22 | entry_point="envs.transfer.classic.pendulum_contextual:PendulumContextualEnv", 23 | max_episode_steps=200, 24 | reward_threshold=195.0, 25 | kwargs= dict(max_speed = max_speed, max_torque = max_torque) 26 | ) 27 | 28 | class PendulumContextualEnv(PendulumEnv): 29 | def __init__(self,max_speed=8, max_torque=2.0): 30 | super(PendulumContextualEnv, self).__init__() 31 | self.context = [max_speed, max_torque] 32 | self.max_speed = self.context[0] 33 | self.max_torque = self.context[1] 34 | 35 | # our own responsibility to define the range of the context, since we define it 36 | self.context_high = np.array([ 37 | self.max_speed * 10, 38 | self.max_torque * 10]) 39 | 40 | self.context_low = np.array([ 0.1, 0.1]) # the params in the context can't be less or equal to zero! 41 | 42 | 43 | def _step(self, action): 44 | state, reward, done, _ = super(PendulumContextualEnv, self)._step(action) 45 | return state, reward, done, {'max_speed':self.max_speed, 'max_torque':self.max_torque} 46 | 47 | 48 | def change_context(self, context_vector): 49 | self.max_speed = context_vector 50 | 51 | 52 | def context_space_info(self): 53 | context_info_dict = {} 54 | 55 | context_info_dict['context_vals'] = [8.0, 2.0] 56 | context_info_dict['context_high'] = self.context_high.tolist() 57 | context_info_dict['context_low' ] = self.context_low.tolist() 58 | context_info_dict['state_dims' ] = 3 59 | # I need to know what the size of the action vector I need to pass to the transition function 60 | context_info_dict['action_dims' ] = 1 61 | context_info_dict['action_space'] = 'continuous' 62 | context_info_dict['state_high' ] = self.observation_space.high.tolist() 63 | context_info_dict['state_low' ] = self.observation_space.low.tolist() 64 | context_info_dict['action_high' ] = self.action_space.high.tolist() 65 | context_info_dict['action_low' ] = self.action_space.low.tolist() 66 | 67 | return context_info_dict 68 | -------------------------------------------------------------------------------- /gym_extensions/tests/README.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | This directory contains both unittest and manual testing scripts. Ideally, everything will be unit tested or integration tested, but it's research so understandably there may be some manual test scripts for simplicity and speed. If you see something you don't like, feel free to open an issue or a pull request to add more formal testing! It would be much appreciated!!! (And you'd be added to the list of contributors) 4 | -------------------------------------------------------------------------------- /gym_extensions/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/gym_extensions/tests/__init__.py -------------------------------------------------------------------------------- /gym_extensions/tests/test_contextual_envs.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.envs.classic_control.cartpole import CartPoleEnv 3 | from gym.envs.classic_control.pendulum import PendulumEnv 4 | from gym_extensions.discrete import classic 5 | 6 | def test_cartpole_contextual(): 7 | env_id = 'CartPoleContextual-v0' 8 | env = gym.make(env_id) 9 | if isinstance(env.unwrapped, CartPoleEnv): 10 | env.reset() 11 | else: 12 | raise NotImplementedError 13 | 14 | nr_of_items_context_space_info = 10 15 | nr_unwrapped = len(list(env.unwrapped.context_space_info().keys())) 16 | if nr_of_items_context_space_info != nr_unwrapped: 17 | print('context_space_info() function needs to be implemented!') 18 | raise NotImplementedError 19 | 20 | context_vect = [0.01, 0.01, 0.01, 0.01] 21 | # these should change because change_context_function 22 | if context_vect == env.unwrapped.context: 23 | raise AttributeError 24 | 25 | env.unwrapped.change_context(context_vect) 26 | if context_vect != env.unwrapped.context: 27 | raise AttributeError 28 | 29 | 30 | def test_pendulum_contextual(): 31 | env_id = 'PendulumContextual-v0' 32 | env = gym.make(env_id) 33 | if isinstance(env.unwrapped, PendulumEnv): 34 | env.reset() 35 | else: 36 | raise NotImplementedError 37 | 38 | nr_of_items_context_space_info = 10 39 | nr_unwrapped = len(list(env.unwrapped.context_space_info().keys())) 40 | if nr_of_items_context_space_info != nr_unwrapped: 41 | print('context_space_info() function needs to be implemented!') 42 | raise NotImplementedError 43 | 44 | context_vect = [0.01, 0.01] 45 | if context_vect == env.unwrapped.context: 46 | raise AttributeError 47 | 48 | env.unwrapped.change_context(context_vect) 49 | if context_vect != env.unwrapped.context: 50 | raise AttributeError 51 | 52 | 53 | if __name__ == "__main__": 54 | test_cartpole_contextual() 55 | test_pendulum_contextual() 56 | -------------------------------------------------------------------------------- /gym_extensions/tests/test_import.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | def test_import_classic(): 4 | """ 5 | Test import of official envs as seen in the README 6 | """ 7 | from gym_extensions.discrete import classic 8 | envs = [x for x in classic.custom_envs.keys()] 9 | [gym.make(x) for x in envs] 10 | 11 | def test_import_box2d(): 12 | """ 13 | Test import of official envs as seen in the README 14 | """ 15 | from gym_extensions.continuous import box2d 16 | envs = [x for x in box2d.custom_envs.keys()] 17 | [gym.make(x) for x in envs] 18 | 19 | def test_import_gym_navigation_2d(): 20 | """ 21 | Test import of official envs as seen in the README 22 | """ 23 | from gym_extensions.continuous import gym_navigation_2d 24 | envs = [x for x in gym_navigation_2d.custom_envs.keys()] 25 | [gym.make(x) for x in envs] 26 | 27 | if __name__ == '__main__': 28 | #print(test_import_classic()) 29 | print(test_import_box2d()) 30 | #print(test_import_gym_navigation_2d()) 31 | -------------------------------------------------------------------------------- /gym_extensions/tests/test_mujoco_import.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | def test_import_mujoco(): 4 | """ 5 | Test import of official envs as seen in the README 6 | """ 7 | from gym_extensions.continuous import mujoco 8 | envs = [x for x in mujoco.custom_envs.keys()] 9 | [gym.make(x) for x in envs] 10 | return True 11 | 12 | if __name__ == '__main__': 13 | print(test_import_mujoco()) 14 | -------------------------------------------------------------------------------- /gym_extensions/tests/test_wrappers.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym_extensions.wrappers.observation_transform_wrapper import ObservationTransformWrapper 3 | from gym_extensions.wrappers.transformers import (SimpleNormalizePixelIntensitiesTransformer, 4 | ResizeImageTransformer, AppendPrevTimeStepTransformer) 5 | import time 6 | 7 | def test_image_resize(): 8 | gymenv = gym.make("Breakout-v4") 9 | transformers = [SimpleNormalizePixelIntensitiesTransformer(), ResizeImageTransformer(fraction_of_current_size=.35)] 10 | transformed_env = ObservationTransformWrapper(gymenv, transformers) 11 | return transformed_env 12 | 13 | def test_append_prev_timestep(): 14 | gymenv = gym.make("Hopper-v1") 15 | transformers = [AppendPrevTimeStepTransformer()] 16 | transformed_env = ObservationTransformWrapper(gymenv, transformers) 17 | return transformed_env 18 | 19 | for env in [test_image_resize(), test_append_prev_timestep()]: 20 | env.reset() 21 | print(env.observation_space) 22 | for t in range(100): 23 | env.render() 24 | 25 | start = time.time() 26 | action = env.action_space.sample() 27 | end = time.time() 28 | 29 | 30 | start = time.time() 31 | observation, reward, done, info = env.step(action) 32 | print(observation) 33 | print(observation.shape) 34 | end = time.time() 35 | 36 | if done: 37 | print("Episode finished after {} timesteps".format(t+1)) 38 | break 39 | -------------------------------------------------------------------------------- /gym_extensions/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/gym_extensions/wrappers/__init__.py -------------------------------------------------------------------------------- /gym_extensions/wrappers/normalized_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import spaces 4 | from .proxy_env import ProxyEnv 5 | from.serializable import Serializable 6 | from gym.spaces.box import Box 7 | 8 | 9 | class NormalizedEnv(ProxyEnv, Serializable): 10 | def __init__( 11 | self, 12 | env, 13 | normalize_obs=False, 14 | obs_alpha=0.001, 15 | ): 16 | Serializable.quick_init(self, locals()) 17 | ProxyEnv.__init__(self, env) 18 | self._normalize_obs = normalize_obs 19 | self._obs_alpha = obs_alpha 20 | self._obs_mean = np.zeros(np.prod(env.observation_space.shape)) 21 | self._obs_var = np.ones(np.prod(env.observation_space.shape)) 22 | 23 | 24 | def _update_obs_estimate(self, obs): 25 | flat_obs = self._wrapped_env.observation_space.flatten(obs) 26 | self._obs_mean = (1 - self._obs_alpha) * self._obs_mean + self._obs_alpha * flat_obs 27 | self._obs_var = (1 - self._obs_alpha) * self._obs_var + self._obs_alpha * np.square(flat_obs - self._obs_mean) 28 | 29 | def _apply_normalize_obs(self, obs): 30 | self._update_obs_estimate(obs) 31 | return (obs - self._obs_mean) / (np.sqrt(self._obs_var) + 1e-8) 32 | 33 | def reset(self): 34 | ret = self._wrapped_env.reset() 35 | if self._normalize_obs: 36 | return self._apply_normalize_obs(ret) 37 | else: 38 | return ret 39 | 40 | @property 41 | def action_space(self): 42 | if isinstance(self._wrapped_env.action_space, Box): 43 | ub = np.ones(self._wrapped_env.action_space.shape) 44 | return spaces.Box(-1 * ub, ub) 45 | return self._wrapped_env.action_space 46 | 47 | def step(self, action): 48 | if isinstance(self._wrapped_env.action_space, Box): 49 | # rescale the action 50 | lb, ub = self._wrapped_env.action_space.low, self._wrapped_env.action_space.high 51 | scaled_action = lb + (action + 1.) * 0.5 * (ub - lb) 52 | scaled_action = np.clip(scaled_action, lb, ub) 53 | else: 54 | scaled_action = action 55 | wrapped_step = self._wrapped_env.step(scaled_action) 56 | next_obs, reward, done, info = wrapped_step 57 | if self._normalize_obs: 58 | next_obs = self._apply_normalize_obs(next_obs) 59 | return next_obs, reward, done, info 60 | 61 | def __str__(self): 62 | return "Normalized: %s" % self._wrapped_env 63 | 64 | # def log_diagnostics(self, paths): 65 | # print "Obs mean:", self._obs_mean 66 | # print "Obs std:", np.sqrt(self._obs_var) 67 | # print "Reward mean:", self._reward_mean 68 | # print "Reward std:", np.sqrt(self._reward_var) 69 | 70 | normalize = NormalizedEnv 71 | -------------------------------------------------------------------------------- /gym_extensions/wrappers/observation_transform_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import Env, spaces 3 | 4 | from cached_property import cached_property 5 | 6 | from .proxy_env import ProxyEnv 7 | from .serializable import Serializable 8 | 9 | BIG = 1e6 10 | 11 | class ObservationTransformWrapper(ProxyEnv, Serializable): 12 | ''' Runs observation through a set of transforms and adjusts the observation space accordingly.''' 13 | 14 | def __init__(self, env, transformers): 15 | ''' 16 | :param env: the environment 17 | :param transformers: a list of transformers 18 | ''' 19 | Serializable.quick_init(self, locals()) 20 | self.transformers = transformers 21 | super(ObservationTransformWrapper, self).__init__(env) 22 | 23 | @cached_property 24 | def observation_space(self): 25 | obs_space = self._wrapped_env.observation_space 26 | print("Original observation space : {}".format(obs_space)) 27 | for transformer in self.transformers: 28 | obs_space = transformer.transformed_observation_space(obs_space) 29 | print("Transformed observation space : {}".format(obs_space)) 30 | return obs_space 31 | 32 | def reset(self): 33 | obs = self._wrapped_env.reset() 34 | for transformer in self.transformers: 35 | obs = transformer.transform(obs) 36 | transformer.reset() 37 | return obs 38 | 39 | def step(self, action): 40 | next_obs, reward, done, info = self._wrapped_env.step(action) 41 | for transformer in self.transformers: 42 | next_obs = transformer.transform(next_obs) 43 | return (next_obs, reward, done, info) 44 | -------------------------------------------------------------------------------- /gym_extensions/wrappers/proxy_env.py: -------------------------------------------------------------------------------- 1 | from gym import Env 2 | 3 | from .serializable import Serializable 4 | 5 | class ProxyEnv(Env, Serializable): 6 | """ 7 | From openai/rllab. We don't take a dependency on rllab itself because it is 8 | being deprecated and additionally it is difficult to install!! 9 | 10 | This wraps an OpenAI environment generically 11 | """ 12 | def __init__(self, wrapped_env): 13 | Serializable.quick_init(self, locals()) 14 | self._wrapped_env = wrapped_env 15 | 16 | @property 17 | def spec(self): 18 | return self._wrapped_env.spec 19 | 20 | @property 21 | def wrapped_env(self): 22 | return self._wrapped_env 23 | 24 | def reset(self, **kwargs): 25 | return self._wrapped_env.reset(**kwargs) 26 | 27 | @property 28 | def action_space(self): 29 | return self._wrapped_env.action_space 30 | 31 | @property 32 | def observation_space(self): 33 | return self._wrapped_env.observation_space 34 | 35 | def step(self, action): 36 | return self._wrapped_env.step(action) 37 | 38 | def render(self, *args, **kwargs): 39 | return self._wrapped_env.render(*args, **kwargs) 40 | 41 | def log_diagnostics(self, paths, *args, **kwargs): 42 | self._wrapped_env.log_diagnostics(paths, *args, **kwargs) 43 | 44 | @property 45 | def horizon(self): 46 | return self._wrapped_env.horizon 47 | 48 | def terminate(self): 49 | self._wrapped_env.terminate() 50 | 51 | def get_param_values(self): 52 | return self._wrapped_env.get_param_values() 53 | 54 | def set_param_values(self, params): 55 | self._wrapped_env.set_param_values(params) 56 | -------------------------------------------------------------------------------- /gym_extensions/wrappers/serializable.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import sys 3 | 4 | class Serializable(object): 5 | """ 6 | From openai/rllab we don't take a dependency on rllab itself because it is being 7 | deprecated and additionally it is difficult to install!! 8 | 9 | This is a simple class that makes serialization convenient. 10 | """ 11 | def __init__(self, *args, **kwargs): 12 | self.__args = args 13 | self.__kwargs = kwargs 14 | 15 | def quick_init(self, locals_): 16 | if getattr(self, "_serializable_initialized", False): 17 | return 18 | if sys.version_info >= (3, 0): 19 | spec = inspect.getfullargspec(self.__init__) 20 | # Exclude the first "self" parameter 21 | if spec.varkw: 22 | kwargs = locals_[spec.varkw] 23 | else: 24 | kwargs = dict() 25 | else: 26 | spec = inspect.getargspec(self.__init__) 27 | if spec.keywords: 28 | kwargs = locals_[spec.keywords] 29 | else: 30 | kwargs = dict() 31 | if spec.varargs: 32 | varargs = locals_[spec.varargs] 33 | else: 34 | varargs = tuple() 35 | in_order_args = [locals_[arg] for arg in spec.args][1:] 36 | self.__args = tuple(in_order_args) + varargs 37 | self.__kwargs = kwargs 38 | setattr(self, "_serializable_initialized", True) 39 | 40 | def __getstate__(self): 41 | return {"__args": self.__args, "__kwargs": self.__kwargs} 42 | 43 | def __setstate__(self, d): 44 | # convert all __args to keyword-based arguments 45 | if sys.version_info >= (3, 0): 46 | spec = inspect.getfullargspec(self.__init__) 47 | else: 48 | spec = inspect.getargspec(self.__init__) 49 | in_order_args = spec.args[1:] 50 | out = type(self)(**dict(zip(in_order_args, d["__args"]), **d["__kwargs"])) 51 | self.__dict__.update(out.__dict__) 52 | 53 | @classmethod 54 | def clone(cls, obj, **kwargs): 55 | assert isinstance(obj, Serializable) 56 | d = obj.__getstate__() 57 | d["__kwargs"] = dict(d["__kwargs"], **kwargs) 58 | out = type(obj).__new__(type(obj)) 59 | out.__setstate__(d) 60 | return out 61 | -------------------------------------------------------------------------------- /gym_extensions/wrappers/transformers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.misc 3 | from gym.spaces.box import Box 4 | from scipy.misc import imresize 5 | 6 | from cached_property import cached_property 7 | 8 | 9 | # TODO: move this to folder with different files 10 | 11 | class BaseTransformer(object): 12 | """ 13 | Base transformer interface, inherited objects should conform to this 14 | """ 15 | 16 | def transform(self, observation): 17 | """ 18 | observation to transform 19 | """ 20 | raise NotImplementedError 21 | 22 | def transformed_observation_space(self, prev_observation_space): 23 | """ 24 | prev_observation_space and how it is modified 25 | """ 26 | return prev_observation_space 27 | 28 | def reset(self): 29 | """ 30 | resets the transformer if there is an operation to be made 31 | """ 32 | return 33 | 34 | class AppendPrevTimeStepTransformer(BaseTransformer): 35 | """ 36 | Keeps track of and appends the previous observation timestep. 37 | """ 38 | def __init__(self): 39 | self.prev_timestep = None 40 | 41 | def transform(self, observation): 42 | if self.prev_timestep is None: 43 | self.prev_timestep = np.zeros(observation.shape) 44 | 45 | new_obs = np.concatenate([observation.reshape((1, -1)), self.prev_timestep.reshape((1, -1))], axis=1).reshape(-1) 46 | 47 | self.prev_timestep = observation 48 | return new_obs 49 | 50 | def transformed_observation_space(self, prev_observation_space): 51 | if type(prev_observation_space) is Box: 52 | #TODO: should use tile? 53 | copy = np.copy(prev_observation_space.low.reshape((1, -1))) 54 | low = np.concatenate([copy, copy], axis=1) 55 | copy = np.copy(prev_observation_space.high.reshape((1, -1))) 56 | high = np.concatenate([copy, copy], axis=1) 57 | return Box(low.reshape(-1), high.reshape(-1)) 58 | else: 59 | raise NotImplementedError("Currently only support Box observation spaces for ResizeImageTransformer") 60 | 61 | return prev_observation_space 62 | 63 | def reset(self): 64 | self.prev_timestep = None 65 | 66 | class SimpleNormalizePixelIntensitiesTransformer(BaseTransformer): 67 | """ 68 | Normalizes pixel intensities simply by dividing by 255. 69 | """ 70 | def transform(self, observation): 71 | return np.array(observation).astype(np.float32) / 255.0 72 | 73 | def transformed_observation_space(self, wrapped_observation_space): 74 | return wrapped_observation_space 75 | 76 | class ResizeImageTransformer(BaseTransformer): 77 | """ 78 | Rescale a given image by a percentage 79 | """ 80 | 81 | def __init__(self, fraction_of_current_size): 82 | self.fraction_of_current_size = fraction_of_current_size 83 | 84 | def transform(self, observation): 85 | return scipy.misc.imresize(observation, self.fraction_of_current_size) 86 | 87 | def transformed_observation_space(self, wrapped_observation_space): 88 | if type(wrapped_observation_space) is Box: 89 | return Box(scipy.misc.imresize(wrapped_observation_space.low, self.fraction_of_current_size), scipy.misc.imresize(wrapped_observation_space.high, self.fraction_of_current_size)) 90 | else: 91 | raise NotImplementedError("Currently only support Box observation spaces for ResizeImageTransformer") 92 | 93 | class RandomSensorMaskTransformer(BaseTransformer): 94 | """ 95 | Randomly occlude a given percentage of sensors on every observation. 96 | Randomly occludes different ones every time 97 | """ 98 | 99 | def __init__(self, env, percent_of_sensors_to_occlude=.15): 100 | """ 101 | Knock out random sensors 102 | """ 103 | self.percent_of_sensors_to_occlude = percent_of_sensors_to_occlude 104 | self.obs_dims = env.observation_space.flat_dim 105 | 106 | def occlude(self, obs): 107 | sensor_idx = np.random.randint(0, self.obs_dims-1, size=int(self.obs_dims * self.percent_of_sensors_to_occlude)) 108 | obs[sensor_idx] = 0 109 | return obs 110 | 111 | def transform(self, observation): 112 | return self.occlude(observation) 113 | -------------------------------------------------------------------------------- /multitask_benchmarks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Breakend/gym-extensions/b260de72d7e4015727add3fee5d40a43f390a283/multitask_benchmarks/__init__.py -------------------------------------------------------------------------------- /multitask_benchmarks/commands_run: -------------------------------------------------------------------------------- 1 | python multitask_benchmarks/run_trpo.py HopperGravityHalf-v0 HopperGravityThreeQuarters-v0 Hopper-v1 HopperGravityOneAndQuarter-v0 HopperGravityOneAndHalf-v0 --num_epochs 500 --batch_size 25000 --text_log_file ./data/hopper_gravity.log --tabular_log_file ./data/hopper_gravity_progress.csv &> debug_log_hopper.log & 2 | 3 | python multitask_benchmarks/run_trpo.py HalfCheetahGravityHalf-v0 HalfCheetahGravityThreeQuarters-v0 HalfCheetah-v1 HalfCheetahGravityOneAndQuarter-v0 HalfCheetahGravityOneAndHalf-v0 --num_epochs 500 --batch_size 25000 --text_log_file ./data/hc_gravity.log --tabular_log_file ./data/hc_gravity_progress.csv &> debug_log_hc.log & 4 | 5 | nohup python multitask_benchmarks/run_trpo.py Walker2dWithSensor-v0 Walker2dWall-v0 --num_epochs 500 --batch_size 25000 --text_log_file ./walker2d_transfer.log --tabular_log_file ./walker2d_transfer.csv & 6 | 7 | nohup python multitask_benchmarks/run_trpo.py HopperWithSensor-v0 HopperWall-v0 --num_epochs 500 --batch_size 25000 --text_log_file ./hopper_transfer.log --tabular_log_file ./hopper_transfer.csv & 8 | -------------------------------------------------------------------------------- /multitask_benchmarks/run_trpo.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.normalized_env import normalize 2 | from rllab.misc.instrument import stub, run_experiment_lite 3 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 4 | from rllab.envs.gym_env import GymEnv 5 | 6 | from sandbox.rocky.tf.envs.base import TfEnv 7 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy 8 | from sandbox.rocky.tf.algos.trpo import TRPO 9 | from rllab.misc import ext 10 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 11 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp 12 | import rllab.misc.logger as logger 13 | import gym_extensions 14 | import pickle 15 | import os.path as osp 16 | import numpy as np 17 | from multitask_benchmarks.utils import * 18 | 19 | import tensorflow as tf 20 | 21 | import argparse 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument("envs", nargs='+', help="The list of environments to train on in order. Eval rollouts will be run on all environments at the end.") 24 | parser.add_argument("--num_epochs", default=100, type=int, help="Number of epochs to run.") 25 | parser.add_argument("--num_final_rollouts", default=20, type=int, help="Number of rollouts to run on final evaluation of environments.") 26 | parser.add_argument("--batch_size", default=25000, type=int, help="Batch_size per epoch (this is the number of (state, action) samples, not the number of rollouts)") 27 | parser.add_argument("--step_size", default=0.01, type=float, help="Step size for TRPO (i.e. the maximum KL bound)") 28 | parser.add_argument("--reg_coeff", default=1e-5, type=float, help="Regularization coefficient for TRPO") 29 | parser.add_argument("--text_log_file", default="./data/debug.log", help="Where text output will go") 30 | parser.add_argument("--tabular_log_file", default="./data/progress.csv", help="Where tabular output will go") 31 | args = parser.parse_args() 32 | 33 | # stub(globals()) 34 | 35 | # ext.set_seed(1) 36 | logger.add_text_output(args.text_log_file) 37 | logger.add_tabular_output(args.tabular_log_file) 38 | logger.set_log_tabular_only(False) 39 | 40 | envs = [] 41 | 42 | for env_name in args.envs: 43 | gymenv = GymEnv(env_name, force_reset=True, record_video=False, record_log=False) 44 | env = TfEnv(normalize(gymenv)) 45 | envs.append((env_name, env)) 46 | 47 | policy = GaussianMLPPolicy( 48 | name="policy", 49 | env_spec=env.spec, 50 | # The neural network policy should have two hidden layers, each with 32 hidden units. 51 | hidden_sizes=(100, 50, 25), 52 | hidden_nonlinearity=tf.nn.relu, 53 | ) 54 | 55 | baseline = LinearFeatureBaseline(env_spec=env.spec) 56 | 57 | 58 | with tf.Session() as sess: 59 | for env_name, env in envs: 60 | 61 | logger.log("Training Policy on %s" % env_name) 62 | 63 | algo = TRPO( 64 | env=env, 65 | policy=policy, 66 | baseline=baseline, 67 | batch_size=args.batch_size, 68 | max_path_length=env.horizon, 69 | n_itr=args.num_epochs, 70 | discount=0.99, 71 | step_size=args.step_size, 72 | optimizer=ConjugateGradientOptimizer(reg_coeff=args.reg_coeff, hvp_approach=FiniteDifferenceHvp(base_eps=args.reg_coeff)) 73 | ) 74 | 75 | custom_train(algo, sess=sess) 76 | 77 | rollouts = algo.obtain_samples(args.num_epochs + 1) 78 | 79 | logger.log("Average reward for training rollouts on (%s): %f +- %f " % (env_name, np.mean([np.sum(p['rewards']) for p in rollouts]), np.std([np.sum(p['rewards']) for p in rollouts]))) 80 | 81 | # Final evaluation on all environments using the learned policy 82 | 83 | total_rollouts = [] 84 | for env_name, env in envs: 85 | rollouts = [] 86 | for i in range(args.num_final_rollouts): 87 | rollout = rollout_policy(policy, env, max_path_length=env.horizon, speedup=1, get_image_observations=True, animated=True) 88 | rollouts.append(rollout) 89 | total_rollouts.append(rollout) 90 | 91 | logger.log("Average reward for eval rollouts on (%s): %f +- %f " % (env_name, np.mean([np.sum(p['rewards']) for p in rollouts]), np.std([np.sum(p['rewards']) for p in rollouts]))) 92 | 93 | logger.log("Total Average across all rollouts and envs: %f +- %f " % (np.mean([np.sum(p['rewards']) for p in total_rollouts]), np.std([np.sum(p['rewards']) for p in total_rollouts]))) 94 | -------------------------------------------------------------------------------- /multitask_benchmarks/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import time 3 | import rllab.misc.logger as logger 4 | import pickle 5 | from rllab.misc import tensor_utils 6 | import numpy as np 7 | from tqdm import tqdm 8 | import copy 9 | 10 | TINY = 1.0e-8 11 | 12 | def initialize_uninitialized(sess): 13 | global_vars = tf.global_variables() 14 | is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars]) 15 | not_initialized_vars = [v for (v, f) in zip(global_vars, is_not_initialized) if not f] 16 | 17 | print([str(i.name) for i in not_initialized_vars]) # only for testing 18 | if len(not_initialized_vars): 19 | sess.run(tf.variables_initializer(not_initialized_vars)) 20 | 21 | def custom_train(algo, sess=None): 22 | """ 23 | This is necessary so that we don't wipe away already initialized policy params. 24 | Ideally, we should pull request this in as an option to RLlab and remove it from here once done 25 | """ 26 | created_session = True if (sess is None) else False 27 | if sess is None: 28 | sess = tf.Session() 29 | sess.__enter__() 30 | 31 | rollout_cache = [] 32 | initialize_uninitialized(sess) 33 | algo.start_worker() 34 | start_time = time.time() 35 | for itr in range(algo.start_itr, algo.n_itr): 36 | itr_start_time = time.time() 37 | with logger.prefix('itr #%d | ' % itr): 38 | logger.log("Obtaining samples...") 39 | paths = algo.obtain_samples(itr) 40 | logger.log("Processing samples...") 41 | samples_data = algo.process_samples(itr, paths) 42 | logger.log("Logging diagnostics...") 43 | algo.log_diagnostics(paths) 44 | logger.log("Optimizing policy...") 45 | algo.optimize_policy(itr, samples_data) 46 | logger.log("Saving snapshot...") 47 | params = algo.get_itr_snapshot(itr, samples_data) # , **kwargs) 48 | if algo.store_paths: 49 | params["paths"] = samples_data["paths"] 50 | logger.save_itr_params(itr, params) 51 | logger.log("Saved") 52 | logger.record_tabular('Time', time.time() - start_time) 53 | logger.record_tabular('ItrTime', time.time() - itr_start_time) 54 | logger.dump_tabular(with_prefix=False) 55 | 56 | algo.shutdown_worker() 57 | if created_session: 58 | sess.close() 59 | 60 | 61 | def rollout_policy(agent, env, max_path_length=200, speedup=1, get_image_observations=False, animated=False): 62 | """ 63 | Mostly taken from https://github.com/bstadie/third_person_im/blob/master/sandbox/bradly/third_person/algos/cyberpunk_trainer.py#L164 64 | Generate a rollout for a given policy 65 | """ 66 | observations = [] 67 | im_observations = [] 68 | actions = [] 69 | rewards = [] 70 | agent_infos = [] 71 | env_infos = [] 72 | o = env.reset() 73 | path_length = 0 74 | 75 | while path_length <= max_path_length: 76 | a, agent_info = agent.get_action(o) 77 | next_o, r, d, env_info = env.step(a) 78 | observations.append(env.observation_space.flatten(o)) 79 | 80 | actions.append(env.action_space.flatten(a)) 81 | agent_infos.append(agent_info) 82 | env_infos.append(env_info) 83 | path_length += 1 84 | o = next_o 85 | if get_image_observations: 86 | if not animated: 87 | pixel_array = env.render(mode="rgb_array") 88 | else: 89 | pixel_array = env.render() 90 | 91 | if pixel_array is None and not animated: 92 | # Not convinced that behaviour works for all environments, so until 93 | # such a time as I'm convinced of this, drop into a debug shell 94 | print("Problem! Couldn't get pixels! Dropping into debug shell.") 95 | import pdb; pdb.set_trace() 96 | im_observations.append(pixel_array) 97 | if d: 98 | rewards.append(r) 99 | break 100 | else: 101 | rewards.append(r) 102 | 103 | if animated: 104 | env.render(close=True) 105 | 106 | im_observations = tensor_utils.stack_tensor_list(im_observations) 107 | observations = tensor_utils.stack_tensor_list(observations) 108 | rewards = tensor_utils.stack_tensor_list(rewards) 109 | 110 | return dict( 111 | observations=observations, 112 | im_observations=im_observations, 113 | actions=tensor_utils.stack_tensor_list(actions), 114 | rewards=rewards, 115 | agent_infos=tensor_utils.stack_tensor_dict_list(agent_infos), 116 | env_infos=tensor_utils.stack_tensor_dict_list(env_infos), 117 | ) 118 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.11 2 | gym[all]>=0.9.2 3 | pyrr 4 | six 5 | cached_property 6 | networkx 7 | scipy>=0.17.0 8 | nose 9 | pygame 10 | scikit-image 11 | matplotlib>=1.3.1 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import sys 3 | from os.path import abspath, join, dirname, realpath 4 | 5 | def read_requirements_file(filename): 6 | req_file_path = '%s/%s' % (dirname(realpath(__file__)), filename) 7 | with open(req_file_path) as f: 8 | return [line.strip() for line in f] 9 | 10 | # only support 2.7 for mujoco compatibility 11 | if sys.version_info < (2,7): 12 | print('Sorry, Python < 2.7 is not supported, please install Python 3.5.2') 13 | sys.exit() 14 | 15 | setup(name='gym_extensions', 16 | version='0.0.1', 17 | packages=['gym_extensions'], 18 | python_requires='>=3.5', 19 | install_requires=read_requirements_file('requirements.txt'), 20 | # mujoco just released 1.5, for now stick with 1.3 21 | #extras_requires={'mujoco':'mujoco-py<1.50.2,>=1.50.1'}, 22 | extras_requires={'mujoco':'mujoco-py<1.50,>=0.5'}, 23 | description='Extensions to OpenAI Gym for multitask learning, inverse reinforcement learning, and other peripheral tasks.', 24 | #author='Peter Henderson', 25 | url='https://github.com/Breakend/gym-extensions', 26 | ) 27 | --------------------------------------------------------------------------------