├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── config ├── __init__.py ├── motion_planner.py ├── pusher.py └── sawyer.py ├── docs ├── content.md ├── img │ ├── 2D_push.png │ ├── clvrbanner.png │ ├── favicon-32x32.png │ ├── method.png │ ├── result.png │ ├── sawyer_assembly.png │ ├── sawyer_lift.png │ ├── sawyer_push.png │ └── teaser.png ├── index.html ├── js │ └── figure-extension.js ├── theme.css └── video │ ├── push_baseline.mp4 │ ├── sawyer_assembly_baseline.mp4 │ ├── sawyer_assembly_baseline_lg.mp4 │ ├── sawyer_assembly_mopa.mp4 │ ├── sawyer_lift_baseline.mp4 │ ├── sawyer_lift_baseline_lg.mp4 │ ├── sawyer_lift_mopa.gif │ ├── sawyer_lift_mopa.mp4 │ ├── sawyer_push_baseline.mp4 │ ├── sawyer_push_baseline_lg.mp4 │ ├── sawyer_push_mopa.gif │ ├── sawyer_push_mopa.mp4 │ └── teaser.gif ├── env ├── __init__.py ├── assets │ ├── meshes │ │ ├── sawyer │ │ │ ├── base.stl │ │ │ ├── head.stl │ │ │ ├── l0.stl │ │ │ ├── l1.stl │ │ │ ├── l2.stl │ │ │ ├── l3.stl │ │ │ ├── l4.stl │ │ │ ├── l5.stl │ │ │ ├── l6.stl │ │ │ └── pedestal.stl │ │ ├── toy_table │ │ │ ├── 0.stl │ │ │ ├── 1.stl │ │ │ ├── 2.stl │ │ │ ├── 3.stl │ │ │ └── 4.stl │ │ └── two_finger_gripper │ │ │ ├── electric_gripper_base.STL │ │ │ ├── half_round_tip.STL │ │ │ └── standard_narrow.STL │ ├── objects │ │ ├── can-visual.xml │ │ ├── can.xml │ │ └── meshes │ │ │ └── can.stl │ ├── textures │ │ ├── can.png │ │ ├── dark-wood.png │ │ ├── grid.png │ │ └── light-wood.png │ └── xml │ │ ├── common │ │ ├── basic_scene.xml │ │ ├── camera.xml │ │ ├── gripper_assembly_chain.xml │ │ ├── gripper_chain.xml │ │ ├── gripper_indicator_chain.xml │ │ ├── gripper_pick_chain.xml │ │ ├── gripper_pick_indicator_chain.xml │ │ ├── gripper_pick_pos_act.xml │ │ ├── gripper_pick_target_chain.xml │ │ ├── gripper_pos_act.xml │ │ ├── gripper_target_chain.xml │ │ ├── materials.xml │ │ ├── pusher.xml │ │ ├── pusher_gripper.xml │ │ ├── sawyer.xml │ │ ├── sawyer_assembly.xml │ │ ├── sawyer_assembly_chain.xml │ │ ├── sawyer_chain.xml │ │ ├── sawyer_dependencies.xml │ │ ├── sawyer_indicator_chain.xml │ │ ├── sawyer_joint_pos_act.xml │ │ ├── sawyer_no_gripper.xml │ │ ├── sawyer_no_gripper_chain.xml │ │ ├── sawyer_no_gripper_indicator_chain.xml │ │ ├── sawyer_no_gripper_target_chain.xml │ │ ├── sawyer_pick.xml │ │ ├── sawyer_pick_chain.xml │ │ ├── sawyer_pick_indicator_chain.xml │ │ ├── sawyer_pick_target_chain.xml │ │ ├── sawyer_target_chain.xml │ │ ├── skybox.xml │ │ ├── target.xml │ │ └── visual.xml │ │ ├── pusher_obstacle.xml │ │ ├── sawyer.xml │ │ ├── sawyer_assembly.xml │ │ ├── sawyer_assembly_obstacle.xml │ │ ├── sawyer_lift.xml │ │ ├── sawyer_lift_obstacle.xml │ │ ├── sawyer_push.xml │ │ └── sawyer_push_obstacle.xml ├── base.py ├── inverse_kinematics.py ├── pusher │ ├── __init__.py │ └── pusher_obstacle.py └── sawyer │ ├── __init__.py │ ├── sawyer.py │ ├── sawyer_assembly.py │ ├── sawyer_assembly_obstacle.py │ ├── sawyer_lift.py │ ├── sawyer_lift_obstacle.py │ ├── sawyer_push.py │ └── sawyer_push_obstacle.py ├── motion_planners ├── 3rd_party │ └── include │ │ └── cxxopts.hpp ├── KinematicPlanner.cpp ├── __init__.py ├── include │ ├── KinematicPlanner.h │ ├── c_planner.pxd.bak │ ├── compound_state_projector.h │ ├── mujoco_ompl_interface.h │ ├── mujoco_wrapper.h │ └── plan.pxd.bak ├── planner.cpp ├── planner.pyx ├── sampling_based_planner.py ├── setup.py ├── setup_macos.py └── src │ ├── compound_state_projector.cpp │ ├── mujoco_ompl_interface.cpp │ └── mujoco_wrapper.cpp ├── requirements.txt ├── rl ├── __init__.py ├── base_agent.py ├── dataset.py ├── main.py ├── mopa_rollouts.py ├── planner_agent.py ├── policies │ ├── __init__.py │ ├── actor_critic.py │ ├── distributions.py │ ├── mlp_actor_critic.py │ └── utils.py ├── rollouts.py ├── sac_agent.py ├── td3_agent.py └── trainer.py ├── scripts ├── 2d │ ├── baseline.sh │ ├── baseline_ik.sh │ ├── baseline_lg.sh │ ├── mopa.sh │ ├── mopa_discrete.sh │ └── mopa_ik.sh ├── 3d │ ├── assembly │ │ ├── baseline.sh │ │ ├── baseline_ik.sh │ │ ├── baseline_lg.sh │ │ ├── mopa.sh │ │ ├── mopa_discrete.sh │ │ └── mopa_ik.sh │ ├── lift │ │ ├── baseline.sh │ │ ├── baseline_ik.sh │ │ ├── baseline_lg.sh │ │ ├── mopa.sh │ │ ├── mopa_discrete.sh │ │ └── mopa_ik.sh │ └── push │ │ ├── baseline.sh │ │ ├── baseline_ik.sh │ │ ├── baseline_lg.sh │ │ ├── mopa.sh │ │ ├── mopa_discrete.sh │ │ └── mopa_ik.sh └── misc │ ├── evaluate_safety.sh │ └── installEigen.sh └── util ├── __init__.py ├── contact_info.py ├── env.py ├── gym.py ├── info.py ├── logger.py ├── misc.py ├── mpi.py ├── pytorch.py ├── sawyer_env.py └── transform_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Mac OS 2 | .DS_Store 3 | *~ 4 | .python-version 5 | 6 | # Log 7 | log 8 | logs 9 | 10 | # Vim 11 | .*.s[a-w][a-z] 12 | Session.vim 13 | 14 | # Data 15 | *.csv 16 | *.ini 17 | *.npy 18 | *.zip 19 | *screenlog* 20 | 21 | # Mujoco 22 | MUJOCO_LOG.TXT 23 | 24 | # Python 25 | ## Byte-compiled / optimized / DLL files 26 | __pycache__/ 27 | *.py[cod] 28 | *$py.class 29 | 30 | ## C extensions 31 | #*.so 32 | 33 | ## Distribution / packaging 34 | .Python 35 | build/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | 40 | ## Jupyter Notebook 41 | .ipynb_checkpoints 42 | **/*.ipynb 43 | 44 | ## virtualenv 45 | .venv 46 | venv/ 47 | 48 | ## Rope project settings 49 | .ropeproject 50 | 51 | ## Wandb 52 | wandb/ 53 | 54 | ## PyCharm 55 | .idea/ 56 | 57 | ## VSCode 58 | .vscode/ 59 | 60 | ## flake8 61 | .flake8 62 | z.vid 63 | z.log.global 64 | test_logs 65 | tmp 66 | 67 | ## Eigen 68 | eigen-3.3.7/ 69 | 70 | motion_planners/*.so 71 | ./result 72 | ./plot 73 | # env/assets/xml/sawyer* 74 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Cognitive Learning for Vision and Robotics (CLVR) lab @ USC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /config/motion_planner.py: -------------------------------------------------------------------------------- 1 | from util import str2bool 2 | 3 | 4 | def add_arguments(parser): 5 | """ 6 | Adds a list of arguments to argparser for the reacher environment. 7 | """ 8 | # reacher 9 | parser.add_argument( 10 | "--planner_type", 11 | type=str, 12 | default="rrt_connect", 13 | choices=[ 14 | "rrt", 15 | "rrt_connect", 16 | ], 17 | help="planner type", 18 | ) 19 | parser.add_argument( 20 | "--simple_planner_type", 21 | type=str, 22 | default="rrt_connect", 23 | choices=[ 24 | "rrt", 25 | "rrt_connect", 26 | ], 27 | help="planner type for simple planner", 28 | ) 29 | parser.add_argument( 30 | "--planner_objective", 31 | type=str, 32 | default="path_length", 33 | choices=[ 34 | "maximize_min_clearance", 35 | "path_length", 36 | "state_cost_integral", 37 | "constraint", 38 | ], 39 | help="planner objective function", 40 | ) 41 | parser.add_argument( 42 | "--threshold", 43 | type=float, 44 | default=0.0, 45 | help="threshold for optimization objective", 46 | ) 47 | parser.add_argument( 48 | "--is_simplified", 49 | type=str2bool, 50 | default=False, 51 | help="enable simplification of planned trajectory", 52 | ) 53 | parser.add_argument( 54 | "--simplified_duration", 55 | type=float, 56 | default=0.01, 57 | help="duration of simplification of planned trajectory", 58 | ) 59 | parser.add_argument( 60 | "--simple_planner_simplified", 61 | type=str2bool, 62 | default=False, 63 | help="enable simplification of planned trajectory for simple planner", 64 | ) 65 | parser.add_argument( 66 | "--simple_planner_simplified_duration", 67 | type=float, 68 | default=0.01, 69 | help="duration of simplification of planned trajectory for simple planner", 70 | ) 71 | 72 | 73 | def get_default_config(): 74 | """ 75 | Gets default configurations for the reacher environment. 76 | """ 77 | import argparse 78 | 79 | parser = argparse.ArgumentParser("Default Configuration for Motion Planner") 80 | add_argument(parser) 81 | 82 | config = parser.parse_args([]) 83 | return config 84 | -------------------------------------------------------------------------------- /config/pusher.py: -------------------------------------------------------------------------------- 1 | from util import str2bool 2 | 3 | 4 | def add_arguments(parser): 5 | """ 6 | Adds a list of arguments to argparser for the pusher environment. 7 | """ 8 | # pusher 9 | parser.add_argument( 10 | "--reward_type", 11 | type=str, 12 | default="dense", 13 | choices=["sparse"], 14 | help="reward type", 15 | ) 16 | parser.add_argument( 17 | "--distance_threshold", 18 | type=float, 19 | default=0.05, 20 | help="distance threshold for termination", 21 | ) 22 | parser.add_argument( 23 | "--max_episode_steps", 24 | type=int, 25 | default=150, 26 | help="maximum timesteps in an episode", 27 | ) 28 | 29 | # observations 30 | parser.add_argument( 31 | "--screen_width", type=int, default=500, help="width of camera image" 32 | ) 33 | parser.add_argument( 34 | "--screen_height", type=int, default=500, help="height of camera image" 35 | ) 36 | parser.add_argument( 37 | "--frame_skip", type=int, default=1, help="Numer of skip frames" 38 | ) 39 | parser.add_argument( 40 | "--kp", type=float, default=150.0, help="p term for a PID controller" 41 | ) 42 | parser.add_argument( 43 | "--kd", type=float, default=20.0, help="d term for a PID controller" 44 | ) 45 | parser.add_argument( 46 | "--ki", type=float, default=0.1, help="i term for a PID controller" 47 | ) 48 | parser.add_argument( 49 | "--frame_dt", type=float, default=1.0, help="delta t between each frame" 50 | ) 51 | parser.add_argument( 52 | "--ctrl_reward_coef", type=float, default=0, help="control reward coefficient" 53 | ) 54 | parser.add_argument( 55 | "--success_reward", type=float, default=150.0, help="completion reward" 56 | ) 57 | parser.add_argument( 58 | "--camera_name", type=str, default="cam0", help="camera name in an environment" 59 | ) 60 | parser.add_argument( 61 | "--range", type=float, default=0.2, help="range of motion planner" 62 | ) 63 | parser.add_argument( 64 | "--simple_planner_range", 65 | type=float, 66 | default=0.1, 67 | help="range of simple motion planner", 68 | ) 69 | parser.add_argument( 70 | "--timelimit", type=float, default=1.0, help="timelimit for planning" 71 | ) 72 | parser.add_argument( 73 | "--simple_planner_timelimit", 74 | type=float, 75 | default=0.02, 76 | help="timelimit for planning by simple motion planner", 77 | ) 78 | parser.add_argument( 79 | "--contact_threshold", 80 | type=float, 81 | default=-0.0015, 82 | help="depth threshold for contact", 83 | ) 84 | parser.add_argument( 85 | "--joint_margin", type=float, default=0.0, help="margin of each joint" 86 | ) 87 | parser.add_argument( 88 | "--step_size", 89 | type=float, 90 | default=0.04, 91 | help="step size for invalid target handling", 92 | ) 93 | 94 | 95 | def get_default_config(): 96 | """ 97 | Gets default configurations for the pusher environment. 98 | """ 99 | import argparse 100 | 101 | parser = argparse.ArgumentParser("Default Configuration for 2D Pusher Environment") 102 | add_argument(parser) 103 | 104 | parser.add_argument("--seed", type=int, default=1234, help="random seed") 105 | parser.add_argument("--debug", type=str2bool, default=False, help="enable debugging") 106 | 107 | config = parser.parse_args([]) 108 | return config 109 | -------------------------------------------------------------------------------- /config/sawyer.py: -------------------------------------------------------------------------------- 1 | from util import str2bool 2 | 3 | 4 | def add_arguments(parser): 5 | """ 6 | Adds a list of arguments to argparser for the sawyer environment. 7 | """ 8 | # sawyer 9 | parser.add_argument( 10 | "--reward_type", 11 | type=str, 12 | default="dense", 13 | choices=["dense", "sparse"], 14 | help="reward type", 15 | ) 16 | parser.add_argument( 17 | "--distance_threshold", 18 | type=float, 19 | default=0.06, 20 | help="distance threshold for termination", 21 | ) 22 | parser.add_argument( 23 | "--max_episode_steps", 24 | type=int, 25 | default=250, 26 | help="maximum timesteps in an episode", 27 | ) 28 | parser.add_argument( 29 | "--screen_width", type=int, default=500, help="width of camera image" 30 | ) 31 | parser.add_argument( 32 | "--screen_height", type=int, default=500, help="height of camera image" 33 | ) 34 | parser.add_argument( 35 | "--camera_name", 36 | type=str, 37 | default="visview", 38 | help="camera name in an environment", 39 | ) 40 | 41 | # observations 42 | parser.add_argument( 43 | "--frame_skip", type=int, default=1, help="Numer of skip frames" 44 | ) 45 | parser.add_argument( 46 | "--action_repeat", type=int, default=5, help="number of action repeats" 47 | ) 48 | parser.add_argument( 49 | "--ctrl_reward_coef", type=float, default=0, help="control reward coefficient" 50 | ) 51 | 52 | parser.add_argument( 53 | "--kp", type=float, default=40.0, help="p term for a PID controller" 54 | ) # 150.) 55 | parser.add_argument( 56 | "--kd", type=float, default=8.0, help="d term for a PID controller" 57 | ) # 20.) 58 | parser.add_argument( 59 | "--ki", type=float, default=0.0, help="i term for a PID controller" 60 | ) 61 | parser.add_argument( 62 | "--frame_dt", type=float, default=0.15, help="delta t between each frame" 63 | ) # 0.1) 64 | parser.add_argument( 65 | "--use_robot_indicator", 66 | type=str2bool, 67 | default=True, 68 | help="enable visualization of robot indicator for motion planner", 69 | ) 70 | parser.add_argument( 71 | "--use_target_robot_indicator", 72 | type=str2bool, 73 | default=True, 74 | help="enable visualization of robot indicator for target position of motion planner", 75 | ) 76 | parser.add_argument( 77 | "--success_reward", type=float, default=150.0, help="completion reward" 78 | ) 79 | parser.add_argument( 80 | "--range", type=float, default=0.1, help="range of motion planner" 81 | ) 82 | parser.add_argument( 83 | "--simple_planner_range", 84 | type=float, 85 | default=0.05, 86 | help="range of simple motion planner", 87 | ) 88 | parser.add_argument( 89 | "--timelimit", type=float, default=1.0, help="timelimit for motion planner" 90 | ) 91 | parser.add_argument( 92 | "--simple_planner_timelimit", 93 | type=float, 94 | default=0.05, 95 | help="timelimit for simple motion planner", 96 | ) 97 | parser.add_argument( 98 | "--contact_threshold", 99 | type=float, 100 | default=-0.002, 101 | help="depth thredhold for contact", 102 | ) 103 | parser.add_argument( 104 | "--joint_margin", type=float, default=0.001, help="marin of each joint" 105 | ) 106 | parser.add_argument( 107 | "--step_size", 108 | type=float, 109 | default=0.02, 110 | help="step size for invalid target handling", 111 | ) 112 | 113 | 114 | def get_default_config(): 115 | """ 116 | Gets default configurations for the Sawyer environment. 117 | """ 118 | import argparse 119 | 120 | parser = argparse.ArgumentParser("Default Configuration for Sawyer Environment") 121 | add_argument(parser) 122 | 123 | parser.add_argument("--seed", type=int, default=1234, help="random seed") 124 | parser.add_argument("--debug", type=str2bool, default=False, help="enable debugging") 125 | 126 | config = parser.parse_args([]) 127 | return config 128 | -------------------------------------------------------------------------------- /docs/content.md: -------------------------------------------------------------------------------- 1 | 2 | ---- 3 | 4 | ## Overview 5 | 6 | ![MoPA-RL Framework](./img/method.png "") 7 | 8 | To solve tasks in obstructed environments, we propose motion planner augmented reinforcement learning (MoPA-RL). Our framework consists of an RL policy and a motion planner. The motion planner is integrated into the RL policy by enlarging the action space. If a sampled action from the RL policy is in the original action space, an agent directly executes the action to the environment, otherwise the motion planner computes a path to move the agent to faraway points. MoPA-RL has three benefits: 9 | 14 | 15 | ---- 16 | 17 | ## Videos 18 | 19 | Sawyer Push 20 |

Sawyer arm is required to find a path to reach an object inside of a box, and push it to a goal position.


21 |
22 |
23 | 26 |
SAC
27 |
28 |
29 | 32 |
SAC Large
33 |
34 |
35 | 38 |
MoPA-SAC (Ours)
39 |
40 |
41 | Sawyer Lift 42 |

Sawyer arm needs to find a path to get inside a box, grasp a can and take it out from the box.


43 |
44 | 45 | 46 |
47 | 50 |
SAC
51 |
52 |
53 | 56 |
SAC Large
57 |
58 |
59 | 62 |
MoPA-SAC (Ours)
63 |
64 | 65 | 66 |
67 | Sawyer Assembly 68 |

Sawyer arm with an attached table leg needs to avoid other legs to reach a hole of the table, and insert the pole to assemble the table.


69 |
70 |
71 | 74 |
SAC
75 |
76 |
77 | 80 |
SAC Large
81 |
82 |
83 | 86 |
MoPA-SAC (Ours)
87 |
88 |
89 | 90 | ---- 91 | 92 | ## Quantitative results 93 | 94 | 95 | 96 |
97 |
98 |
99 |

Success rates of our MoPA-SAC (green) and several baselines averaged over 4 seeds. Our approach can leverage the motion planner to converge with fewer environment steps than thebaseline. Both SAC and ours are trained for the same number of environment steps.

100 | 101 |
102 |
103 |
104 | 105 | ---- 106 | 107 | ## Citation 108 | ``` 109 | @inproceedings{yamada2020mopa, 110 | title={Motion Planner Augmented Reinforcement Learning for Obstructed Environments}, 111 | author={Jun Yamada and Youngwoon Lee and Gautam Salhotra and Karl Pertsch and Max Pflueger and Gaurav S. Sukhatme and Joseph J. Lim and Peter Englert}, 112 | booktitle={Conference on Robot Learning}, 113 | year={2020}, 114 | } 115 | ``` 116 | -------------------------------------------------------------------------------- /docs/img/2D_push.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/2D_push.png -------------------------------------------------------------------------------- /docs/img/clvrbanner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/clvrbanner.png -------------------------------------------------------------------------------- /docs/img/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/favicon-32x32.png -------------------------------------------------------------------------------- /docs/img/method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/method.png -------------------------------------------------------------------------------- /docs/img/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/result.png -------------------------------------------------------------------------------- /docs/img/sawyer_assembly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_assembly.png -------------------------------------------------------------------------------- /docs/img/sawyer_lift.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_lift.png -------------------------------------------------------------------------------- /docs/img/sawyer_push.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_push.png -------------------------------------------------------------------------------- /docs/img/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/teaser.png -------------------------------------------------------------------------------- /docs/js/figure-extension.js: -------------------------------------------------------------------------------- 1 | (function (extension) { 2 | if (typeof showdown !== 'undefined') { 3 | // global (browser or nodejs global) 4 | extension(showdown); 5 | } else if (typeof define === 'function' && define.amd) { 6 | // AMD 7 | define(['showdown'], extension); 8 | } else if (typeof exports === 'object') { 9 | // Node, CommonJS-like 10 | module.exports = extension(require('showdown')); 11 | } else { 12 | // showdown was not found so we throw 13 | throw Error('Could not find showdown library'); 14 | } 15 | }(function (showdown) { 16 | 17 | var fig = '
' + '%2' + '
%3
' + '
'; 18 | var imgRegex = /(?:

)?(?:<\/p>)?/gi; 19 | 20 | // loading extension into shodown 21 | showdown.extension('figure', function () { 22 | return [ 23 | { 24 | type: 'output', 25 | filter: function (text, converter, options) { 26 | var tag = fig; 27 | 28 | return text.replace(imgRegex, function (match, url, alt, rest) { 29 | return tag.replace('%1', url).replace('%2', alt).replace('%3', alt).replace('%4', alt); 30 | }); 31 | } 32 | } 33 | ]; 34 | }); 35 | })); -------------------------------------------------------------------------------- /docs/video/push_baseline.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/push_baseline.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_assembly_baseline.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_baseline.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_assembly_baseline_lg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_baseline_lg.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_assembly_mopa.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_mopa.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_lift_baseline.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_baseline.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_lift_baseline_lg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_baseline_lg.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_lift_mopa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_mopa.gif -------------------------------------------------------------------------------- /docs/video/sawyer_lift_mopa.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_mopa.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_push_baseline.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_baseline.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_push_baseline_lg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_baseline_lg.mp4 -------------------------------------------------------------------------------- /docs/video/sawyer_push_mopa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_mopa.gif -------------------------------------------------------------------------------- /docs/video/sawyer_push_mopa.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_mopa.mp4 -------------------------------------------------------------------------------- /docs/video/teaser.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/teaser.gif -------------------------------------------------------------------------------- /env/__init__.py: -------------------------------------------------------------------------------- 1 | """ Define all environments. """ 2 | 3 | from gym.envs.registration import register 4 | 5 | 6 | # register all environments to use 7 | register( 8 | id="PusherObstacle-v0", 9 | entry_point="env.pusher:PusherObstacleEnv", 10 | kwargs={}, 11 | ) 12 | 13 | register(id="SawyerAssembly-v0", entry_point="env.sawyer:SawyerAssemblyEnv", kwargs={}) 14 | register( 15 | id="SawyerAssemblyObstacle-v0", 16 | entry_point="env.sawyer:SawyerAssemblyObstacleEnv", 17 | kwargs={}, 18 | ) 19 | 20 | register(id="SawyerLift-v0", entry_point="env.sawyer:SawyerLiftEnv", kwargs={}) 21 | register( 22 | id="SawyerLiftObstacle-v0", 23 | entry_point="env.sawyer:SawyerLiftObstacleEnv", 24 | kwargs={}, 25 | ) 26 | 27 | register(id="SawyerPush-v0", entry_point="env.sawyer:SawyerPushEnv", kwargs={}) 28 | register( 29 | id="SawyerPushObstacle-v0", 30 | entry_point="env.sawyer:SawyerPushObstacleEnv", 31 | kwargs={}, 32 | ) 33 | -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/base.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/head.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/head.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/l0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l0.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/l1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l1.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/l2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l2.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/l3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l3.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/l4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l4.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/l5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l5.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/l6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l6.stl -------------------------------------------------------------------------------- /env/assets/meshes/sawyer/pedestal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/pedestal.stl -------------------------------------------------------------------------------- /env/assets/meshes/toy_table/0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/0.stl -------------------------------------------------------------------------------- /env/assets/meshes/toy_table/1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/1.stl -------------------------------------------------------------------------------- /env/assets/meshes/toy_table/2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/2.stl -------------------------------------------------------------------------------- /env/assets/meshes/toy_table/3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/3.stl -------------------------------------------------------------------------------- /env/assets/meshes/toy_table/4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/4.stl -------------------------------------------------------------------------------- /env/assets/meshes/two_finger_gripper/electric_gripper_base.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/electric_gripper_base.STL -------------------------------------------------------------------------------- /env/assets/meshes/two_finger_gripper/half_round_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/half_round_tip.STL -------------------------------------------------------------------------------- /env/assets/meshes/two_finger_gripper/standard_narrow.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/standard_narrow.STL -------------------------------------------------------------------------------- /env/assets/objects/can-visual.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /env/assets/objects/can.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /env/assets/objects/meshes/can.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/objects/meshes/can.stl -------------------------------------------------------------------------------- /env/assets/textures/can.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/can.png -------------------------------------------------------------------------------- /env/assets/textures/dark-wood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/dark-wood.png -------------------------------------------------------------------------------- /env/assets/textures/grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/grid.png -------------------------------------------------------------------------------- /env/assets/textures/light-wood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/light-wood.png -------------------------------------------------------------------------------- /env/assets/xml/common/basic_scene.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 20 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /env/assets/xml/common/camera.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_assembly_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_indicator_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_pick_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_pick_indicator_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_pick_pos_act.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_pick_target_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_pos_act.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /env/assets/xml/common/gripper_target_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /env/assets/xml/common/materials.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /env/assets/xml/common/sawyer.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /env/assets/xml/common/sawyer_assembly.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /env/assets/xml/common/sawyer_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 84 | -------------------------------------------------------------------------------- /env/assets/xml/common/sawyer_joint_pos_act.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /env/assets/xml/common/sawyer_no_gripper.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /env/assets/xml/common/sawyer_pick.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /env/assets/xml/common/skybox.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /env/assets/xml/common/target.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /env/assets/xml/common/visual.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /env/assets/xml/pusher_obstacle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /env/assets/xml/sawyer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /env/assets/xml/sawyer_assembly.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /env/assets/xml/sawyer_assembly_obstacle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /env/assets/xml/sawyer_lift.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /env/assets/xml/sawyer_lift_obstacle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /env/assets/xml/sawyer_push.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /env/assets/xml/sawyer_push_obstacle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /env/pusher/__init__.py: -------------------------------------------------------------------------------- 1 | from env.pusher.pusher_obstacle import PusherObstacleEnv 2 | -------------------------------------------------------------------------------- /env/sawyer/__init__.py: -------------------------------------------------------------------------------- 1 | from env.sawyer.sawyer_push import SawyerPushEnv 2 | from env.sawyer.sawyer_push_obstacle import SawyerPushObstacleEnv 3 | from env.sawyer.sawyer_lift import SawyerLiftEnv 4 | from env.sawyer.sawyer_assembly import SawyerAssemblyEnv 5 | from env.sawyer.sawyer_assembly_obstacle import SawyerAssemblyObstacleEnv 6 | from env.sawyer.sawyer_assembly import SawyerAssemblyEnv 7 | from env.sawyer.sawyer_lift_obstacle import SawyerLiftObstacleEnv 8 | -------------------------------------------------------------------------------- /env/sawyer/sawyer_assembly.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from env.sawyer.sawyer import SawyerEnv 4 | 5 | 6 | class SawyerAssemblyEnv(SawyerEnv): 7 | def __init__(self, **kwargs): 8 | kwargs["camera_name"] = "topview" 9 | super().__init__("sawyer_assembly.xml", **kwargs) 10 | self._get_reference() 11 | 12 | def _get_reference(self): 13 | super()._get_reference() 14 | 15 | @property 16 | def dof(self): 17 | return 7 18 | 19 | @property 20 | def init_qpos(self): 21 | return np.array([0.427, 0.13, 0.0557, 0.114, -0.0622, 0.0276, 0.00356]) 22 | 23 | def _reset(self): 24 | init_qpos = ( 25 | self.init_qpos + self.np_random.randn(self.init_qpos.shape[0]) * 0.02 26 | ) 27 | self.sim.data.qpos[self.ref_joint_pos_indexes] = init_qpos 28 | self.sim.data.qvel[self.ref_joint_vel_indexes] = 0.0 29 | self.sim.forward() 30 | 31 | return self._get_obs() 32 | 33 | def compute_reward(self, action): 34 | info = {} 35 | reward = 0 36 | reward_type = self._kwargs["reward_type"] 37 | pegHeadPos = self.sim.data.get_site_xpos("pegHead") 38 | hole = self.sim.data.get_site_xpos("hole") 39 | dist = np.linalg.norm(pegHeadPos - hole) 40 | hole_bottom = self.sim.data.get_site_xpos("hole_bottom") 41 | dist_to_hole_bottom = np.linalg.norm(pegHeadPos - hole_bottom) 42 | if reward_type == "dense": 43 | reward_reach = np.tanh(-1.5 * dist) 44 | reward += reward_reach 45 | info = dict(reward_reach=reward_reach) 46 | else: 47 | reward_reach = 0 48 | if dist < 0.3: 49 | # reward_reach += 0.4 * (1-np.tanh(15*dist)) 50 | reward_reach += 0.4 * (1 - np.tanh(15 * dist_to_hole_bottom)) 51 | reward += reward_reach 52 | if dist_to_hole_bottom < 0.04: 53 | reward += self._kwargs["success_reward"] 54 | self._success = True 55 | self._terminal = True 56 | 57 | return reward, info 58 | 59 | def _get_obs(self): 60 | di = super()._get_obs() 61 | di["hole"] = self.sim.data.get_site_xpos("hole") 62 | di["pegHead"] = self.sim.data.get_site_xpos("pegHead") 63 | di["pegEnd"] = self.sim.data.get_site_xpos("pegEnd") 64 | di["peg_quat"] = self._get_quat("peg") 65 | return di 66 | 67 | @property 68 | def static_bodies(self): 69 | return ["table", "furniture", "0_part0", "1_part1", "4_part4", "2_part2"] 70 | 71 | @property 72 | def static_geoms(self): 73 | return [] 74 | 75 | @property 76 | def static_geom_ids(self): 77 | body_ids = [] 78 | for body_name in self.static_bodies: 79 | body_ids.append(self.sim.model.body_name2id(body_name)) 80 | 81 | geom_ids = [] 82 | for geom_id, body_id in enumerate(self.sim.model.geom_bodyid): 83 | if body_id in body_ids: 84 | geom_ids.append(geom_id) 85 | return geom_ids 86 | 87 | def _step(self, action, is_planner=False): 88 | """ 89 | (Optional) does gripper visualization after actions. 90 | """ 91 | assert len(action) == self.dof, "environment got invalid action dimension" 92 | 93 | if not is_planner or self._prev_state is None: 94 | self._prev_state = self.sim.data.qpos[self.ref_joint_pos_indexes].copy() 95 | 96 | if self._i_term is None: 97 | self._i_term = np.zeros_like(self.mujoco_robot.dof) 98 | 99 | if is_planner: 100 | rescaled_ac = np.clip( 101 | action[: self.robot_dof], -self._ac_scale, self._ac_scale 102 | ) 103 | else: 104 | rescaled_ac = np.clip( 105 | action[: self.robot_dof] * self._ac_scale, 106 | -self._ac_scale, 107 | self._ac_scale, 108 | ) 109 | desired_state = self._prev_state + rescaled_ac 110 | 111 | n_inner_loop = int(self._frame_dt / self.dt) 112 | for _ in range(n_inner_loop): 113 | self.sim.data.qfrc_applied[ 114 | self.ref_joint_vel_indexes 115 | ] = self.sim.data.qfrc_bias[self.ref_joint_vel_indexes].copy() 116 | 117 | if self.use_robot_indicator: 118 | self.sim.data.qfrc_applied[ 119 | self.ref_indicator_joint_pos_indexes 120 | ] = self.sim.data.qfrc_bias[self.ref_indicator_joint_pos_indexes].copy() 121 | 122 | if self.use_target_robot_indicator: 123 | self.sim.data.qfrc_applied[ 124 | self.ref_target_indicator_joint_pos_indexes 125 | ] = self.sim.data.qfrc_bias[ 126 | self.ref_target_indicator_joint_pos_indexes 127 | ].copy() 128 | self._do_simulation(desired_state) 129 | 130 | self._prev_state = np.copy(desired_state) 131 | reward, info = self.compute_reward(action) 132 | 133 | return self._get_obs(), reward, self._terminal, info 134 | -------------------------------------------------------------------------------- /env/sawyer/sawyer_assembly_obstacle.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from env.sawyer.sawyer import SawyerEnv 4 | 5 | 6 | class SawyerAssemblyObstacleEnv(SawyerEnv): 7 | def __init__(self, **kwargs): 8 | super().__init__("sawyer_assembly_obstacle.xml", **kwargs) 9 | self._get_reference() 10 | 11 | def _get_reference(self): 12 | super()._get_reference() 13 | 14 | @property 15 | def dof(self): 16 | return 7 17 | 18 | @property 19 | def init_qpos(self): 20 | return np.array([0.427, 0.13, 0.0557, 0.114, -0.0622, 0.0276, 0.00356]) 21 | 22 | def _reset(self): 23 | init_qpos = ( 24 | self.init_qpos + self.np_random.randn(self.init_qpos.shape[0]) * 0.02 25 | ) 26 | self.sim.data.qpos[self.ref_joint_pos_indexes] = init_qpos 27 | self.sim.data.qvel[self.ref_joint_vel_indexes] = 0.0 28 | self.sim.forward() 29 | 30 | return self._get_obs() 31 | 32 | def compute_reward(self, action): 33 | info = {} 34 | reward = 0 35 | reward_type = self._kwargs["reward_type"] 36 | pegHeadPos = self.sim.data.get_site_xpos("pegHead") 37 | hole = self.sim.data.get_site_xpos("hole") 38 | dist = np.linalg.norm(pegHeadPos - hole) 39 | hole_bottom = self.sim.data.get_site_xpos("hole_bottom") 40 | dist_to_hole_bottom = np.linalg.norm(pegHeadPos - hole_bottom) 41 | dist_to_hole = np.linalg.norm(pegHeadPos - hole) 42 | reward_reach = 0 43 | if dist < 0.3: 44 | reward_reach += 0.4 * (1 - np.tanh(15 * dist_to_hole)) 45 | reward += reward_reach 46 | if dist_to_hole_bottom < 0.025: 47 | reward += self._kwargs["success_reward"] 48 | self._success = True 49 | self._terminal = True 50 | 51 | return reward, info 52 | 53 | def _get_obs(self): 54 | di = super()._get_obs() 55 | di["hole"] = self.sim.data.get_site_xpos("hole") 56 | di["pegHead"] = self.sim.data.get_site_xpos("pegHead") 57 | di["pegEnd"] = self.sim.data.get_site_xpos("pegEnd") 58 | di["peg_quat"] = self._get_quat("peg") 59 | return di 60 | 61 | @property 62 | def static_bodies(self): 63 | return ["table"] 64 | 65 | @property 66 | def manipulation_bodies(self): 67 | return ["furniture", "0_part0", "1_part1", "4_part4", "2_part2"] 68 | 69 | @property 70 | def manipulation_geom_ids(self): 71 | body_ids = [] 72 | for body_name in self.manipulation_bodies: 73 | body_ids.append(self.sim.model.body_name2id(body_name)) 74 | 75 | geom_ids = [] 76 | for geom_id, body_id in enumerate(self.sim.model.geom_bodyid): 77 | if body_id in body_ids: 78 | geom_ids.append(geom_id) 79 | return geom_ids 80 | 81 | @property 82 | def static_geoms(self): 83 | return [] 84 | 85 | @property 86 | def static_geom_ids(self): 87 | body_ids = [] 88 | for body_name in self.static_bodies: 89 | body_ids.append(self.sim.model.body_name2id(body_name)) 90 | 91 | geom_ids = [] 92 | for geom_id, body_id in enumerate(self.sim.model.geom_bodyid): 93 | if body_id in body_ids: 94 | geom_ids.append(geom_id) 95 | return geom_ids 96 | 97 | def _step(self, action, is_planner=False): 98 | """ 99 | (Optional) does gripper visualization after actions. 100 | """ 101 | assert len(action) == self.dof, "environment got invalid action dimension" 102 | 103 | if not is_planner or self._prev_state is None: 104 | self._prev_state = self.sim.data.qpos[self.ref_joint_pos_indexes].copy() 105 | 106 | if self._i_term is None: 107 | self._i_term = np.zeros_like(self.mujoco_robot.dof) 108 | 109 | if is_planner: 110 | rescaled_ac = np.clip( 111 | action[: self.robot_dof], -self._ac_scale, self._ac_scale 112 | ) 113 | else: 114 | rescaled_ac = np.clip( 115 | action[: self.robot_dof] * self._ac_scale, 116 | -self._ac_scale, 117 | self._ac_scale, 118 | ) 119 | desired_state = self._prev_state + rescaled_ac 120 | 121 | n_inner_loop = int(self._frame_dt / self.dt) 122 | for _ in range(n_inner_loop): 123 | self.sim.data.qfrc_applied[ 124 | self.ref_joint_vel_indexes 125 | ] = self.sim.data.qfrc_bias[self.ref_joint_vel_indexes].copy() 126 | 127 | if self.use_robot_indicator: 128 | self.sim.data.qfrc_applied[ 129 | self.ref_indicator_joint_pos_indexes 130 | ] = self.sim.data.qfrc_bias[self.ref_indicator_joint_pos_indexes].copy() 131 | 132 | if self.use_target_robot_indicator: 133 | self.sim.data.qfrc_applied[ 134 | self.ref_target_indicator_joint_pos_indexes 135 | ] = self.sim.data.qfrc_bias[ 136 | self.ref_target_indicator_joint_pos_indexes 137 | ].copy() 138 | self._do_simulation(desired_state) 139 | 140 | self._prev_state = np.copy(desired_state) 141 | reward, info = self.compute_reward(action) 142 | 143 | return self._get_obs(), reward, self._terminal, info 144 | -------------------------------------------------------------------------------- /motion_planners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/motion_planners/__init__.py -------------------------------------------------------------------------------- /motion_planners/include/KinematicPlanner.h: -------------------------------------------------------------------------------- 1 | #ifndef _KinematicPlanner_h 2 | #define _KinematicPlanner_h 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | #include "mujoco_wrapper.h" 32 | #include "mujoco_ompl_interface.h" 33 | 34 | namespace ob = ompl::base; 35 | namespace oc = ompl::control; 36 | namespace og = ompl::geometric; 37 | 38 | namespace MotionPlanner 39 | { 40 | class KinematicPlanner 41 | { 42 | public: 43 | std::string xml_filename; 44 | std::string algo; 45 | int num_actions; 46 | std::string opt; 47 | double threshold; 48 | double _range; 49 | std::string mjkey_filename; 50 | std::shared_ptr mj; 51 | std::shared_ptr si; 52 | std::shared_ptr mj_state_prop; 53 | std::shared_ptr rrt_planner; 54 | std::shared_ptr rrt_connect_planner; 55 | std::shared_ptr ss; 56 | std::shared_ptr msvc; 57 | std::shared_ptr psimp_; 58 | std::vector passive_joint_idx; 59 | std::vector glue_bodies; 60 | std::vector> ignored_contacts; 61 | std::string planner_status; 62 | bool isSimplified; 63 | double simplifiedDuration; 64 | int seed; 65 | 66 | KinematicPlanner(std::string xml_filename, std::string algo, int num_actions, std::string opt, double threshold, double _range, std::vector passive_joint_idx, std::vector Glue_bodies, std::vector> ignored_contacts, double contact_threshold, double goal_bias, bool is_simplified, double simplified_duration, int seed); 67 | ~KinematicPlanner(); 68 | std::vector > plan(std::vector start_vec, std::vector goal_vec, double timelimit); 69 | bool isValidState(std::vector state_vec); 70 | std::string getPlannerStatus(); 71 | }; 72 | } 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /motion_planners/include/c_planner.pxd.bak: -------------------------------------------------------------------------------- 1 | from libcpp.string cimport string 2 | from libcpp.vector cimport vector 3 | 4 | cdef extern from "Planner.h" namespace "MotionPlanner": 5 | cdef cppclass Planner: 6 | Planner(string) 7 | string xml_filename 8 | int planning(vector[double], vector[double], double) 9 | -------------------------------------------------------------------------------- /motion_planners/include/compound_state_projector.h: -------------------------------------------------------------------------------- 1 | /// Provide a state projection for compound states. 2 | /// This only uses the elements of the state that are expressed as a vector 3 | /// of doubles. 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | class CompoundStateProjector 15 | : public ompl::base::RealVectorRandomLinearProjectionEvaluator { 16 | public: 17 | CompoundStateProjector(const ompl::base::CompoundStateSpace* space, 18 | std::shared_ptr real_space_, 19 | int dim_) 20 | : RealVectorRandomLinearProjectionEvaluator(real_space_, dim_), 21 | real_space(real_space_), 22 | dim(dim_) { 23 | } 24 | 25 | CompoundStateProjector(const ompl::base::CompoundStateSpace* space, 26 | std::shared_ptr real_space_, 27 | std::vector cell_sizes) 28 | : RealVectorRandomLinearProjectionEvaluator(real_space_, cell_sizes), 29 | real_space(real_space_), 30 | dim(cell_sizes.size()) {} 31 | 32 | static std::shared_ptr makeCompoundStateProjector( 33 | const ompl::base::CompoundStateSpace* space) 34 | { 35 | auto real_space = std::make_shared( 36 | space->getDimension()); 37 | // real_space->setBounds(-3.14, 3.14); 38 | // real_space->setBounds(-1, 1); 39 | int dim = 3; 40 | std::vector cell_sizes(dim, 0.1); 41 | auto csp(std::make_shared( 42 | space, real_space, cell_sizes)); 43 | return csp; 44 | } 45 | 46 | unsigned int getDimension() const override { 47 | return dim; 48 | } 49 | 50 | void project( 51 | const ompl::base::State* state, 52 | Eigen::Ref< Eigen::VectorXd > projection) const override 53 | { 54 | auto rv_state = getRealVectorState(state); 55 | 56 | // Use a real space projection 57 | ompl::base::RealVectorRandomLinearProjectionEvaluator::project( 58 | rv_state, projection); 59 | 60 | // Cleanup 61 | real_space->freeState(rv_state); 62 | } 63 | 64 | 65 | private: 66 | ompl::base::State* getRealVectorState(const ompl::base::State* state) const { 67 | // Create a real vector state 68 | std::vector reals; 69 | real_space->copyToReals(reals, state); 70 | auto rv_state = real_space->allocState(); 71 | for(size_t i=0; i < reals.size(); i++) { 72 | rv_state->as() 73 | ->values[i] = reals[i]; 74 | } 75 | 76 | return rv_state; 77 | } 78 | 79 | std::shared_ptr real_space; 80 | const int dim; 81 | }; 82 | -------------------------------------------------------------------------------- /motion_planners/include/mujoco_wrapper.h: -------------------------------------------------------------------------------- 1 | // Wrap MuJoCo functionality to make it available to OMPL API 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "mujoco.h" 11 | 12 | struct JointInfo { 13 | std::string name; 14 | int type; 15 | bool limited; 16 | mjtNum range[2]; 17 | int qposadr; 18 | int dofadr; 19 | }; 20 | 21 | std::ostream& operator<<(std::ostream& os, const JointInfo& ji); 22 | 23 | std::vector getJointInfo(const mjModel* m); 24 | 25 | struct StateRange { 26 | bool limited; 27 | mjtNum range[2]; 28 | }; 29 | 30 | StateRange getCtrlRange(const mjModel* m, size_t i); 31 | 32 | struct MuJoCoState { 33 | mjtNum time; 34 | std::vector qpos; 35 | std::vector qvel; 36 | std::vector act; 37 | std::vector ctrl; 38 | }; 39 | 40 | std::ostream& operator<<(std::ostream& os, const MuJoCoState& s); 41 | 42 | 43 | // Put some sanity on the MuJoCo API 44 | class MuJoCo { 45 | public: 46 | MuJoCo(std::string mjkey_filename): 47 | m(0), d(0) { 48 | // mj_activate and mj_deactivate should only be called 49 | // once per program 50 | // mj_instance_count_lock.lock(); 51 | //if (mj_instance_count == 0) { 52 | mj_activate(mjkey_filename.c_str()); 53 | //} 54 | //mj_instance_count += 1; 55 | //mj_instance_count_lock.unlock(); 56 | } 57 | 58 | ~MuJoCo() { 59 | if (d) mj_deleteData(d); 60 | if (m) mj_deleteModel(m); 61 | mj_instance_count_lock.lock(); 62 | mj_instance_count -= 1; 63 | if (mj_instance_count == 0) { 64 | mj_deactivate(); 65 | } 66 | mj_instance_count_lock.unlock(); 67 | } 68 | 69 | // TODO: copy constructor 70 | // TODO: assignment operator 71 | 72 | bool loadXML(std::string filename) { 73 | if (m) mj_deleteModel(m); 74 | char error[1000]; 75 | m = mj_loadXML(filename.c_str(), 0, error, 1000); 76 | if (!m) { 77 | std::cerr << error << std::endl; 78 | } 79 | max_timestep = m->opt.timestep; 80 | return m; 81 | } 82 | 83 | bool makeData() { 84 | if (!m) { 85 | std::cerr << "Cannot makeData without a model!" << std::endl; 86 | return false; 87 | } 88 | if (d) mj_deleteData(d); 89 | d = mj_makeData(m); 90 | return d; 91 | } 92 | 93 | 94 | 95 | std::string getJointName(int i) const { 96 | // Avert your eyes of this horror 97 | return std::string(m->names + m->name_jntadr[i]); 98 | } 99 | 100 | std::string getBodyName(int i) const { 101 | return std::string(m->names + m->name_bodyadr[i]); 102 | } 103 | 104 | std::string getActName(int i) const { 105 | return std::string(m->names + m->name_actuatoradr[i]); 106 | } 107 | 108 | /// Set the world to random state within specified limits 109 | /// modifies d->qpos and d->qvel 110 | void setRandomState() { 111 | mj_resetData(m, d); 112 | // Set default states 113 | for (size_t i=0; i < m->nq; i++) { 114 | d->qpos[i] = m->qpos0[i]; 115 | } 116 | 117 | // Set random states within joint limit for DoFs 118 | auto joints = getJointInfo(m); 119 | for (size_t i=0; i < m->nv; i++) { 120 | int joint_id = m->dof_jntid[i]; 121 | int qposadr = m->jnt_qposadr[ joint_id ]; 122 | 123 | mjtNum r = ((mjtNum) rand()) / ((mjtNum) RAND_MAX); 124 | auto lower = joints[joint_id].range[0]; 125 | auto upper = joints[joint_id].range[1]; 126 | if (!joints[joint_id].limited) { 127 | // set range to -pi to pi 128 | lower = -3.1416; 129 | upper = 3.1416; 130 | } 131 | d->qpos[qposadr] = (r * (upper - lower)) + lower; 132 | 133 | // velocity = 0 seem reasonable 134 | d->qvel[i] = 0; 135 | } 136 | } 137 | 138 | void setState(MuJoCoState s) { 139 | d->time = s.time; 140 | for(size_t i=0; i < m->nq; i++) { 141 | if (i >= s.qpos.size()) break; 142 | d->qpos[i] = s.qpos[i]; 143 | } 144 | for(size_t i=0; i < m->nv; i++) { 145 | if (i >= s.qvel.size()) break; 146 | d->qvel[i] = s.qvel[i]; 147 | } 148 | for(size_t i=0; i < m->na; i++) { 149 | if (i >= s.act.size()) break; 150 | d->act[i] = s.act[i]; 151 | } 152 | for(size_t i=0; i < m->nu; i++) { 153 | if (i >= s.ctrl.size()) break; 154 | d->ctrl[i] = s.ctrl[i]; 155 | } 156 | } 157 | 158 | MuJoCoState getState() const { 159 | MuJoCoState s; 160 | s.time = d->time; 161 | for(size_t i=0; i < m->nq; i++) { 162 | s.qpos.push_back(d->qpos[i]); 163 | } 164 | for(size_t i=0; i < m->nv; i++) { 165 | s.qvel.push_back(d->qvel[i]); 166 | } 167 | for(size_t i=0; i < m->na; i++) { 168 | s.act.push_back(d->act[i]); 169 | } 170 | for(size_t i=0; i < m->nu; i++) { 171 | s.ctrl.push_back(d->ctrl[i]); 172 | } 173 | return s; 174 | } 175 | 176 | void step() { 177 | mj_step(m, d); 178 | } 179 | 180 | void sim_duration(double duration) { 181 | int steps = ceil(duration / max_timestep); 182 | m->opt.timestep = duration / steps; 183 | for(int i=0; i < steps; i++) { 184 | mj_step(m, d); 185 | } 186 | } 187 | 188 | double getMaxTimestep() const { 189 | return max_timestep; 190 | } 191 | 192 | mjModel* m; 193 | mjData* d; 194 | 195 | private: 196 | double max_timestep; 197 | static int mj_instance_count; 198 | static std::mutex mj_instance_count_lock; 199 | }; 200 | 201 | -------------------------------------------------------------------------------- /motion_planners/include/plan.pxd.bak: -------------------------------------------------------------------------------- 1 | cimport c_planner 2 | from c_planner cimport Planner as CPlanner 3 | 4 | cdef class Planner: 5 | cdef c_planner.Planner *thisptr 6 | cpdef planning(self, start_vec, goal_vec, timelimit) 7 | -------------------------------------------------------------------------------- /motion_planners/planner.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # distutils: sources = KinematicPlanner.cpp 3 | 4 | from libcpp.string cimport string 5 | from libcpp cimport bool as bool_ 6 | from libcpp.vector cimport vector 7 | from libcpp.pair cimport pair 8 | 9 | cdef extern from "KinematicPlanner.h" namespace "MotionPlanner": 10 | cdef cppclass KinematicPlanner: 11 | KinematicPlanner(string, string, int, string, double, double, vector[int], vector[string], 12 | vector[pair[int, int]], double, double, bool_, double, int) except + 13 | string xml_filename 14 | string opt 15 | int num_actions 16 | string algo 17 | double _range 18 | double threshold 19 | vector[int] passive_joint_idx 20 | vector[string] glue_bodies 21 | vector[pair[int, int]] ignored_contacts 22 | double contact_threshold 23 | string planner_status 24 | bool_ isSimplified 25 | double simplifiedDuration 26 | vector[vector[double]] plan(vector[double], vector[double], double) 27 | bool_ isValidState(vector[double]) 28 | string getPlannerStatus() 29 | int seed 30 | 31 | cdef class PyKinematicPlanner: 32 | cdef KinematicPlanner *thisptr 33 | def __cinit__(self, string xml_filename, string algo, int num_actions, string opt, double threshold, 34 | double _range, vector[int] passive_joint_idx, vector[string] glue_bodies, 35 | vector[pair[int, int]] ignored_contacts, double contact_threshold, double goal_bias, 36 | bool_ is_simplified, double simplified_duration, int seed): 37 | 38 | self.thisptr = new KinematicPlanner(xml_filename, algo, num_actions, opt, threshold, _range, 39 | passive_joint_idx, glue_bodies, ignored_contacts, contact_threshold, 40 | goal_bias, is_simplified, simplified_duration, seed) 41 | 42 | def __dealloc__(self): 43 | del self.thisptr 44 | 45 | cpdef plan(self, start_vec, goal_vec, timelimit): 46 | return self.thisptr.plan(start_vec, goal_vec, timelimit) 47 | 48 | cpdef getPlannerStatus(self): 49 | return self.thisptr.getPlannerStatus() 50 | 51 | cpdef isValidState(self, state_vec): 52 | return self.thisptr.isValidState(state_vec) 53 | 54 | -------------------------------------------------------------------------------- /motion_planners/sampling_based_planner.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | import numpy as np 4 | import subprocess 5 | from threading import Lock, Thread 6 | import yaml 7 | from motion_planners.planner import PyKinematicPlanner 8 | from util.env import joint_convert 9 | 10 | 11 | class SamplingBasedPlanner: 12 | def __init__( 13 | self, 14 | config, 15 | xml_path, 16 | num_actions, 17 | non_limited_idx, 18 | planner_type=None, 19 | passive_joint_idx=[], 20 | glue_bodies=[], 21 | ignored_contacts=[], 22 | contact_threshold=0.0, 23 | goal_bias=0.05, 24 | is_simplified=False, 25 | simplified_duration=0.1, 26 | range_=None, 27 | ): 28 | self.config = config 29 | if planner_type is None: 30 | planner_type = config.planner_type 31 | if range_ is None: 32 | range_ = config.range 33 | self.planner = PyKinematicPlanner( 34 | xml_path.encode("utf-8"), 35 | planner_type.encode("utf-8"), 36 | num_actions, 37 | config.planner_objective.encode("utf-8"), 38 | config.threshold, 39 | range_, 40 | passive_joint_idx, 41 | glue_bodies, 42 | ignored_contacts, 43 | contact_threshold, 44 | goal_bias, 45 | is_simplified, 46 | simplified_duration, 47 | config.seed, 48 | ) 49 | self.non_limited_idx = non_limited_idx 50 | 51 | def convert_nonlimited(self, state): 52 | if self.non_limited_idx is not None: 53 | for idx in self.non_limited_idx: 54 | state[idx] = joint_convert(state[idx]) 55 | return state 56 | 57 | def isValidState(self, state): 58 | return self.planner.isValidState(state) 59 | 60 | def plan(self, start, goal, timelimit=1.0): 61 | valid_state = True 62 | exact = True 63 | converted_start = self.convert_nonlimited(start.copy()) 64 | converted_goal = self.convert_nonlimited(goal.copy()) 65 | states = np.array(self.planner.plan(converted_start, converted_goal, timelimit)) 66 | 67 | if np.unique(states).size == 1: 68 | if states[0][0] == -5: 69 | valid_state = False 70 | if states[0][0] == -4: 71 | exact = False 72 | return states, states, valid_state, exact 73 | 74 | traj = [start] 75 | pre_state = states[0] 76 | for _, state in enumerate(states[1:]): 77 | # converted_pre_state = self.convert_nonlimited(pre_state.copy()) 78 | tmp_state = traj[-1] + (state - pre_state) 79 | if self.non_limited_idx is not None: 80 | for idx in self.non_limited_idx: 81 | if abs(state[idx] - pre_state[idx]) > 3.14: 82 | if pre_state[idx] > 0 and state[idx] <= 0: 83 | # if traj[-1][idx] < 0: 84 | tmp_state[idx] = traj[-1][idx] + ( 85 | 3.14 - pre_state[idx] + state[idx] + 3.14 86 | ) 87 | # else: 88 | # tmp_state[idx] = traj[-1][idx] - (3.14-pre_state[idx] + state[idx] + 3.14) 89 | # tmp_state[idx] = traj[-1][idx] + 3.14 + state[idx] 90 | elif pre_state[idx] < 0 and state[idx] > 0: 91 | # if traj[-1][idx] < 0: 92 | tmp_state[idx] = traj[-1][idx] - ( 93 | 3.14 - state[idx] + pre_state[idx] + 3.14 94 | ) 95 | # else: 96 | # tmp_state[idx] = traj[-1][idx] + (3.14-state[idx] + pre_state[idx] + 3.14) 97 | 98 | # tmp_state[idx] = traj[-1][idx] - 3.14 + state[idx] 99 | pre_state = state 100 | traj.append(tmp_state) 101 | return np.array(traj), states, valid_state, exact 102 | 103 | def remove_collision(self, geom_id, contype, conaffinity): 104 | self.planner.removeCollision(geom_id, contype, conaffinity) 105 | 106 | def get_planner_status(self): 107 | return self.planner.getPlannerStatus().decode("utf-8") 108 | -------------------------------------------------------------------------------- /motion_planners/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages, Extension 2 | from Cython.Build import cythonize 3 | from distutils.sysconfig import get_python_lib 4 | import glob 5 | import os 6 | import sys 7 | import platform 8 | 9 | prefix_path = os.environ['HOME'] 10 | extensions = [ 11 | Extension('planner', ['planner.pyx', 'KinematicPlanner.cpp', './src/mujoco_ompl_interface.cpp', './src/mujoco_wrapper.cpp', 12 | ], 13 | include_dirs=["./include/", '/usr/local/include/eigen3', './3rd_party/include/', 14 | os.path.join(prefix_path, '.mujoco/mujoco200/include/'), '/usr/local/include/ompl'], 15 | extra_objects=['/usr/local/lib/libompl.so', os.path.join(prefix_path, '.mujoco/mujoco200/bin/libmujoco200.so')], 16 | extra_compile_args=['-std=c++11'], 17 | language="c++"), 18 | ] 19 | setup( 20 | name='mujoco-ompl', 21 | ext_modules=cythonize(extensions), 22 | ) 23 | -------------------------------------------------------------------------------- /motion_planners/setup_macos.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages, Extension 2 | from Cython.Build import cythonize 3 | from distutils.sysconfig import get_python_lib 4 | import glob 5 | import os 6 | import sys 7 | import platform 8 | 9 | # export MACOSX_DEPLOYMENT_TARGET=10.13 10 | 11 | prefix_path = os.environ['HOME'] 12 | ompl_lib_path = os.path.join(prefix_path, '/usr/local/lib/libompl.dylib') 13 | eigen_include_path = '/usr/local/include/eigen3' 14 | 15 | extensions = [ 16 | Extension('planner', ['planner.pyx', 'KinematicPlanner.cpp', './src/mujoco_ompl_interface.cpp', './src/mujoco_wrapper.cpp', 17 | ], 18 | include_dirs=["./include/", '/opt/local/include', eigen_include_path, './3rd_party/include/', '/opt/local/include/boost/', 19 | os.path.join(prefix_path, '.mujoco/mujoco200/include/'), '/usr/local/include/ompl', '/usr/local/include'], 20 | extra_objects=[ompl_lib_path, os.path.join(prefix_path, '.mujoco/mujoco200/bin/libmujoco200.dylib')], 21 | extra_compile_args=['-std=c++11', '-stdlib=libc++'], 22 | language="c++"), 23 | ] 24 | setup( 25 | name='mujoco-ompl', 26 | ext_modules=cythonize(extensions), 27 | ) 28 | -------------------------------------------------------------------------------- /motion_planners/src/compound_state_projector.cpp: -------------------------------------------------------------------------------- 1 | #include "compound_state_projector.h" 2 | -------------------------------------------------------------------------------- /motion_planners/src/mujoco_wrapper.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "mujoco_wrapper.h" 3 | 4 | using namespace std; 5 | 6 | 7 | ////////////////////////////// 8 | // Init static variables 9 | ////////////////////////////// 10 | 11 | int MuJoCo::mj_instance_count=0; 12 | std::mutex MuJoCo::mj_instance_count_lock; 13 | 14 | ////////////////////////////// 15 | // Define functions 16 | ////////////////////////////// 17 | 18 | std::ostream& operator<<(std::ostream& os, const JointInfo& ji) { 19 | os << "Joint( name: \"" << ji.name << "\", " 20 | << "type: " << ji.type << ", " 21 | << "limited: " << ji.limited << ", " 22 | << "range: (" << ji.range[0] << ", " << ji.range[1] << ") " 23 | << ")"; 24 | return os; 25 | } 26 | 27 | std::ostream& operator<<(std::ostream& os, const MuJoCoState& s) { 28 | os << "{time: " << s.time << ", " 29 | << "qpos: ["; 30 | for(auto const& i : s.qpos) { 31 | os << i << ", "; 32 | } 33 | os << "] qvel: ["; 34 | for(auto const& i : s.qvel) { 35 | os << i << ", "; 36 | } 37 | os << "] act: ["; 38 | for(auto const& i : s.act) { 39 | os << i << ", "; 40 | } 41 | os << "] ctrl: ["; 42 | for(auto const& i : s.ctrl) { 43 | os << i << ", "; 44 | } 45 | os << "]}"; 46 | return os; 47 | } 48 | 49 | std::vector getJointInfo(const mjModel* m) { 50 | std::vector joints; 51 | for (size_t i=0; i < m->njnt; i++) { 52 | JointInfo joint; 53 | joint.name = std::string(m->names + m->name_jntadr[i]); 54 | joint.type = m->jnt_type[i]; 55 | joint.limited = (bool) m->jnt_limited[i]; 56 | joint.range[0] = m->jnt_range[2*i]; 57 | joint.range[1] = m->jnt_range[2*i + 1]; 58 | joint.qposadr = m->jnt_qposadr[i]; 59 | joint.dofadr = m->jnt_dofadr[i]; 60 | joints.push_back(joint); 61 | } 62 | return joints; 63 | } 64 | 65 | StateRange getCtrlRange(const mjModel* m, size_t i) { 66 | StateRange r; 67 | r.limited = (bool) m->actuator_ctrllimited[i]; 68 | r.range[0] = m->actuator_ctrlrange[2*i]; 69 | r.range[1] = m->actuator_ctrlrange[2*i + 1]; 70 | return r; 71 | } 72 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | torch 4 | wandb 5 | requests 6 | moviepy==1.0.0 7 | imageio 8 | colorlog 9 | pyquaternion 10 | tqdm 11 | mpi4py 12 | sklearn 13 | h5py 14 | ipdb 15 | scikit-image 16 | opencv-python 17 | dm_control 18 | gym==0.15.4 19 | mujoco-py==2.0.2.5 20 | pybullet==1.9.5 21 | -------------------------------------------------------------------------------- /rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/rl/__init__.py -------------------------------------------------------------------------------- /rl/base_agent.py: -------------------------------------------------------------------------------- 1 | from gym import spaces 2 | import numpy as np 3 | 4 | 5 | class BaseAgent(object): 6 | def __init__(self, config, ob_space): 7 | self._config = config 8 | 9 | def normalize(self, ob): 10 | if self._config.ob_norm: 11 | return self._ob_norm.normalize(ob) 12 | return ob 13 | 14 | def act(self, ob, is_train=True, return_stds=False, random_exploration=False): 15 | if random_exploration: 16 | ac = self._ac_space.sample() 17 | for k, space in self._ac_space.spaces.items(): 18 | if isinstance(space, spaces.Discrete): 19 | ac[k] = np.array([ac[k]]) 20 | activation = None 21 | stds = None 22 | return ac, activation, stds 23 | 24 | if return_stds: 25 | ac, activation, stds = self._actor.act( 26 | ob, is_train=is_train, return_stds=return_stds 27 | ) 28 | return ac, activation, stds 29 | else: 30 | ac, activation = self._actor.act( 31 | ob, is_train=is_train, return_stds=return_stds 32 | ) 33 | return ac, activation 34 | 35 | def store_episode(self, rollouts): 36 | raise NotImplementedError() 37 | 38 | def replay_buffer(self): 39 | return self._buffer.state_dict() 40 | 41 | def load_replay_buffer(self, state_dict): 42 | self._buffer.load_state_dict(state_dict) 43 | 44 | def sync_networks(self): 45 | raise NotImplementedError() 46 | 47 | def train(self): 48 | raise NotImplementedError() 49 | 50 | def _soft_update_target_network(self, target, source, tau): 51 | for target_param, param in zip(target.parameters(), source.parameters()): 52 | target_param.data.copy_((1 - tau) * param.data + tau * target_param.data) 53 | 54 | def _copy_target_network(self, target, source): 55 | for target_param, source_param in zip(target.parameters(), source.parameters()): 56 | target_param.data.copy_(source_param.data) 57 | -------------------------------------------------------------------------------- /rl/dataset.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from time import time 3 | 4 | import numpy as np 5 | 6 | 7 | class ReplayBuffer: 8 | def __init__(self, keys, buffer_size, sample_func): 9 | self._size = buffer_size 10 | 11 | # memory management 12 | self._idx = 0 13 | self._current_size = 0 14 | self._sample_func = sample_func 15 | 16 | # create the buffer to store info 17 | self._keys = keys 18 | self._buffers = defaultdict(list) 19 | 20 | def clear(self): 21 | self._idx = 0 22 | self._current_size = 0 23 | self._buffers = defaultdict(list) 24 | 25 | # store the episode 26 | def store_episode(self, rollout): 27 | idx = self._idx = (self._idx + 1) % self._size 28 | self._current_size += 1 29 | 30 | if self._current_size > self._size: 31 | for k in self._keys: 32 | self._buffers[k][idx] = rollout[k] 33 | else: 34 | for k in self._keys: 35 | self._buffers[k].append(rollout[k]) 36 | 37 | # sample the data from the replay buffer 38 | def sample(self, batch_size): 39 | # sample transitions 40 | transitions = self._sample_func(self._buffers, batch_size) 41 | return transitions 42 | 43 | def state_dict(self): 44 | return self._buffers 45 | 46 | def load_state_dict(self, state_dict): 47 | self._buffers = state_dict 48 | self._current_size = len(self._buffers["ac"]) 49 | 50 | 51 | class RandomSampler: 52 | def sample_func(self, episode_batch, batch_size_in_transitions): 53 | rollout_batch_size = len(episode_batch["ac"]) 54 | batch_size = batch_size_in_transitions 55 | episode_idxs = np.random.randint(0, rollout_batch_size, batch_size) 56 | t_samples = [ 57 | np.random.randint(len(episode_batch["ac"][episode_idx])) 58 | for episode_idx in episode_idxs 59 | ] 60 | 61 | transitions = {} 62 | for key in episode_batch.keys(): 63 | transitions[key] = [ 64 | episode_batch[key][episode_idx][t] 65 | for episode_idx, t in zip(episode_idxs, t_samples) 66 | ] 67 | 68 | if "ob_next" not in episode_batch.keys(): 69 | transitions["ob_next"] = [ 70 | episode_batch["ob"][episode_idx][t + 1] 71 | for episode_idx, t in zip(episode_idxs, t_samples) 72 | ] 73 | 74 | new_transitions = {} 75 | for k, v in transitions.items(): 76 | if isinstance(v[0], dict): 77 | sub_keys = v[0].keys() 78 | new_transitions[k] = { 79 | sub_key: np.stack([v_[sub_key] for v_ in v]) for sub_key in sub_keys 80 | } 81 | else: 82 | new_transitions[k] = np.stack(v) 83 | 84 | return new_transitions 85 | 86 | 87 | class HERSampler: 88 | def __init__(self, replay_strategy, replay_k, reward_func=None): 89 | self.replay_strategy = replay_strategy 90 | if self.replay_strategy == "future": 91 | self.future_p = 1 - (1.0 / 1 + replay_k) 92 | else: 93 | self.future_p = 0 94 | self.reward_func = reward_func 95 | 96 | def sample_func(self, episode_batch, batch_size_in_transitions): 97 | rollout_batch_size = len(episode_batch["ac"]) 98 | batch_size = batch_size_in_transitions 99 | 100 | # select which rollouts and which timesteps to be used 101 | episode_idxs = np.random.randint(0, rollout_batch_size, batch_size) 102 | t_samples = [ 103 | np.random.randint(len(episode_batch["ac"][episode_idx])) 104 | for episode_idx in episode_idxs 105 | ] 106 | 107 | transitions = {} 108 | for key in episode_batch.keys(): 109 | transitions[key] = [ 110 | episode_batch[key][episode_idx][t] 111 | for episode_idx, t in zip(episode_idxs, t_samples) 112 | ] 113 | 114 | transitions["ob_next"] = [ 115 | episode_batch["ob"][episode_idx][t + 1] 116 | for episode_idx, t in zip(episode_idxs, t_samples) 117 | ] 118 | transitions["r"] = np.zeros((batch_size,)) 119 | 120 | # hindsight experience replay 121 | for i, (episode_idx, t) in enumerate(zip(episode_idxs, t_samples)): 122 | replace_goal = np.random.uniform() < self.future_p 123 | if replace_goal: 124 | future_t = np.random.randint( 125 | t + 1, len(episode_batch["ac"][episode_idx]) + 1 126 | ) 127 | future_ag = episode_batch["ag"][episode_idx][future_t] 128 | if ( 129 | self.reward_func( 130 | episode_batch["ag"][episode_idx][t], future_ag, None 131 | ) 132 | < 0 133 | ): 134 | transitions["g"][i] = future_ag 135 | 136 | transitions["r"][i] = self.reward_func( 137 | episode_batch["ag"][episode_idx][t + 1], transitions["g"][i], None 138 | ) 139 | 140 | new_transitions = {} 141 | for k, v in transitions.items(): 142 | if isinstance(v[0], dict): 143 | sub_keys = v[0].keys() 144 | new_transitions[k] = { 145 | sub_key: np.stack([v_[sub_key] for v_ in v]) for sub_key in sub_keys 146 | } 147 | else: 148 | new_transitions[k] = np.stack(v) 149 | 150 | return new_transitions 151 | -------------------------------------------------------------------------------- /rl/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import signal 3 | import os 4 | import json 5 | from datetime import datetime 6 | 7 | import numpy as np 8 | import torch 9 | from six.moves import shlex_quote 10 | from mpi4py import MPI 11 | from logging import CRITICAL 12 | 13 | from config import argparser 14 | from config.motion_planner import add_arguments as mp_add_arguments 15 | from rl.trainer import Trainer 16 | from util.logger import logger 17 | 18 | 19 | np.set_printoptions(precision=3) 20 | np.set_printoptions(suppress=True) 21 | 22 | 23 | def run(config): 24 | rank = MPI.COMM_WORLD.Get_rank() 25 | config.rank = rank 26 | config.is_chef = rank == 0 27 | config.seed = config.seed + rank 28 | config.num_workers = MPI.COMM_WORLD.Get_size() 29 | config.is_mpi = False if config.num_workers == 1 else True 30 | 31 | if torch.get_num_threads() != 1: 32 | fair_num_threads = max( 33 | int(torch.get_num_threads() / MPI.COMM_WORLD.Get_size()), 1 34 | ) 35 | torch.set_num_threads(fair_num_threads) 36 | 37 | if config.is_chef: 38 | logger.warning("Running a base worker.") 39 | make_log_files(config) 40 | else: 41 | logger.warning("Running worker %d and disabling logger", config.rank) 42 | logger.setLevel(CRITICAL) 43 | 44 | if config.date is None: 45 | now = datetime.now() 46 | date = now.strftime("%m.%d") 47 | else: 48 | date = config.date 49 | config.run_name = "rl.{}.{}.{}.{}".format( 50 | config.env, date, config.prefix, config.seed - rank 51 | ) 52 | if config.group is None: 53 | config.group = "rl.{}.{}.{}".format(config.env, date, config.prefix) 54 | 55 | config.log_dir = os.path.join(config.log_root_dir, config.run_name) 56 | if config.is_train: 57 | config.record_dir = os.path.join(config.log_dir, "video") 58 | else: 59 | config.record_dir = os.path.join(config.log_dir, "eval_video") 60 | 61 | def shutdown(signal, frame): 62 | logger.warning("Received signal %s: exiting", signal) 63 | sys.exit(128 + signal) 64 | 65 | signal.signal(signal.SIGHUP, shutdown) 66 | signal.signal(signal.SIGINT, shutdown) 67 | signal.signal(signal.SIGTERM, shutdown) 68 | 69 | # set global seed 70 | np.random.seed(config.seed) 71 | torch.manual_seed(config.seed) 72 | torch.cuda.manual_seed_all(config.seed) 73 | 74 | os.environ["DISPLAY"] = ":1" 75 | 76 | if config.gpu is not None: 77 | os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(config.gpu) 78 | assert torch.cuda.is_available() 79 | config.device = torch.device("cuda") 80 | else: 81 | config.device = torch.device("cpu") 82 | 83 | # build a trainer 84 | trainer = Trainer(config) 85 | if config.is_train: 86 | trainer.train() 87 | logger.info("Finish training") 88 | else: 89 | trainer.evaluate() 90 | logger.info("Finish evaluating") 91 | 92 | 93 | def make_log_files(config): 94 | if config.date is None: 95 | now = datetime.now() 96 | date = now.strftime("%m.%d") 97 | else: 98 | date = config.date 99 | # date = '07.25' 100 | config.run_name = "rl.{}.{}.{}.{}".format( 101 | config.env, date, config.prefix, config.seed 102 | ) 103 | if config.group is None: 104 | config.group = "rl.{}.{}.{}".format(config.env, date, config.prefix) 105 | 106 | config.log_dir = os.path.join(config.log_root_dir, config.run_name) 107 | logger.info("Create log directory: %s", config.log_dir) 108 | os.makedirs(config.log_dir, exist_ok=True) 109 | 110 | if config.is_train: 111 | config.record_dir = os.path.join(config.log_dir, "video") 112 | else: 113 | config.record_dir = os.path.join(config.log_dir, "eval_video") 114 | logger.info("Create video directory: %s", config.record_dir) 115 | os.makedirs(config.record_dir, exist_ok=True) 116 | 117 | if config.is_train: 118 | # log git diff 119 | cmds = [ 120 | "echo `git rev-parse HEAD` >> {}/git.txt".format(config.log_dir), 121 | "git diff >> {}/git.txt".format(config.log_dir), 122 | "echo 'python -m rl.main {}' >> {}/cmd.sh".format( 123 | " ".join([shlex_quote(arg) for arg in sys.argv[1:]]), config.log_dir 124 | ), 125 | ] 126 | os.system("\n".join(cmds)) 127 | 128 | # log config 129 | param_path = os.path.join(config.log_dir, "params.json") 130 | logger.info("Store parameters in %s", param_path) 131 | with open(param_path, "w") as fp: 132 | json.dump(config.__dict__, fp, indent=4, sort_keys=True) 133 | 134 | 135 | if __name__ == "__main__": 136 | parser = argparser() 137 | args, unparsed = parser.parse_known_args() 138 | 139 | if "Pusher" in args.env: 140 | from config.pusher import add_arguments 141 | elif "Sawyer" in args.env: 142 | from config.sawyer import add_arguments 143 | else: 144 | raise ValueError("args.env (%s) is not supported" % args.env) 145 | 146 | add_arguments(parser) 147 | mp_add_arguments(parser) 148 | args, unparsed = parser.parse_known_args() 149 | 150 | if args.debug: 151 | args.rollout_length = 150 152 | args.start_steps = 100 153 | 154 | if len(unparsed): 155 | logger.error("Unparsed argument is detected:\n%s", unparsed) 156 | else: 157 | run(args) 158 | -------------------------------------------------------------------------------- /rl/planner_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from util.gym import action_size 4 | from util.logger import logger 5 | from motion_planners.sampling_based_planner import SamplingBasedPlanner 6 | 7 | 8 | class PlannerAgent: 9 | def __init__( 10 | self, 11 | config, 12 | ac_space, 13 | non_limited_idx=None, 14 | passive_joint_idx=[], 15 | ignored_contacts=[], 16 | planner_type=None, 17 | goal_bias=0.05, 18 | is_simplified=False, 19 | simplified_duration=0.1, 20 | range_=None, 21 | ): 22 | 23 | self._config = config 24 | self.planner = SamplingBasedPlanner( 25 | config, 26 | config._xml_path, 27 | action_size(ac_space), 28 | non_limited_idx, 29 | planner_type=planner_type, 30 | passive_joint_idx=passive_joint_idx, 31 | ignored_contacts=ignored_contacts, 32 | contact_threshold=config.contact_threshold, 33 | goal_bias=goal_bias, 34 | is_simplified=is_simplified, 35 | simplified_duration=simplified_duration, 36 | range_=range_, 37 | ) 38 | 39 | self._is_simplified = is_simplified 40 | self._simplified_duration = simplified_duration 41 | 42 | def plan(self, start, goal, timelimit=None, attempts=15): 43 | config = self._config 44 | if timelimit is None: 45 | timelimit = config.timelimit 46 | traj, states, valid, exact = self.planner.plan(start, goal, timelimit) 47 | success = valid and exact 48 | 49 | if success: 50 | return traj[1:], success, valid, exact 51 | else: 52 | return traj, success, valid, exact 53 | 54 | def get_planner_status(self): 55 | return self.planner.get_planner_status() 56 | 57 | def isValidState(self, state): 58 | return self.planner.isValidState(state) 59 | -------------------------------------------------------------------------------- /rl/policies/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlp_actor_critic import MlpActor, MlpCritic 2 | 3 | 4 | def get_actor_critic_by_name(name): 5 | if name == "mlp": 6 | return MlpActor, MlpCritic 7 | else: 8 | raise NotImplementedError() 9 | -------------------------------------------------------------------------------- /rl/policies/distributions.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.distributions 8 | 9 | 10 | # Categorical 11 | FixedCategorical = torch.distributions.Categorical 12 | 13 | old_sample = FixedCategorical.sample 14 | FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1) 15 | 16 | log_prob_cat = FixedCategorical.log_prob 17 | # FixedCategorical.log_probs = lambda self, actions: log_prob_cat(self, actions.squeeze(-1)).view(actions.size(0), -1).sum(-1).unsqueeze(-1) 18 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat( 19 | self, actions.squeeze(-1) 20 | ).unsqueeze(-1) 21 | 22 | categorical_entropy = FixedCategorical.entropy 23 | FixedCategorical.entropy = lambda self: categorical_entropy(self) * 10.0 # scaling 24 | 25 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True) 26 | 27 | # Normal 28 | FixedNormal = torch.distributions.Normal 29 | 30 | normal_init = FixedNormal.__init__ 31 | FixedNormal.__init__ = lambda self, mean, std: normal_init( 32 | self, mean.double(), std.double() 33 | ) 34 | 35 | log_prob_normal = FixedNormal.log_prob 36 | FixedNormal.log_probs = ( 37 | lambda self, actions: log_prob_normal(self, actions.double()) 38 | .sum(-1, keepdim=True) 39 | .float() 40 | ) 41 | 42 | normal_entropy = FixedNormal.entropy 43 | FixedNormal.entropy = lambda self: normal_entropy(self).sum(-1).float() 44 | 45 | FixedNormal.mode = lambda self: self.mean.float() 46 | 47 | normal_sample = FixedNormal.sample 48 | FixedNormal.sample = lambda self: normal_sample(self).float() 49 | 50 | normal_rsample = FixedNormal.rsample 51 | FixedNormal.rsample = lambda self: normal_rsample(self).float() 52 | 53 | 54 | def init(module, weight_init, bias_init, gain=1): 55 | weight_init(module.weight.data, gain=gain) 56 | bias_init(module.bias.data) 57 | return module 58 | 59 | 60 | class AddBias(nn.Module): 61 | def __init__(self, bias): 62 | super().__init__() 63 | self._bias = nn.Parameter(bias.unsqueeze(1)) 64 | 65 | def forward(self, x): 66 | if x.dim() == 2: 67 | bias = self._bias.t().view(1, -1) 68 | else: 69 | bias = self._bias.t().view(1, -1, 1, 1) 70 | return x + bias 71 | 72 | 73 | class Categorical(nn.Module): 74 | def __init__(self): 75 | super().__init__() 76 | 77 | def forward(self, x): 78 | return FixedCategorical(logits=x) 79 | 80 | 81 | class DiagGaussian(nn.Module): 82 | def __init__(self, config): 83 | super().__init__() 84 | self.logstd = AddBias(torch.zeros(config.action_size)) 85 | self.config = config 86 | 87 | def forward(self, x): 88 | zeros = torch.zeros(x.size()).to(self.config.device) 89 | logstd = self.logstd(zeros) 90 | return FixedNormal(x, logstd.exp()) 91 | 92 | 93 | class MixedDistribution(nn.Module): 94 | def __init__(self, distributions): 95 | super().__init__() 96 | assert isinstance(distributions, OrderedDict) 97 | self.distributions = distributions 98 | 99 | def mode(self): 100 | return OrderedDict([(k, dist.mode()) for k, dist in self.distributions.items()]) 101 | 102 | def sample(self): 103 | return OrderedDict( 104 | [(k, dist.sample()) for k, dist in self.distributions.items()] 105 | ) 106 | 107 | def rsample(self): 108 | return OrderedDict( 109 | [(k, dist.rsample()) for k, dist in self.distributions.items()] 110 | ) 111 | 112 | def log_probs(self, x): 113 | assert isinstance(x, dict) 114 | return OrderedDict( 115 | [(k, dist.log_probs(x[k])) for k, dist in self.distributions.items()] 116 | ) 117 | 118 | def entropy(self): 119 | return sum([dist.entropy() for dist in self.distributions.values()]) 120 | 121 | 122 | class GumbelSoftmax(torch.distributions.RelaxedOneHotCategorical): 123 | """ 124 | A differentiable Categorical distribution using reparametrization trick with Gumbel-Softmax 125 | Explanation http://amid.fish/assets/gumbel.html 126 | NOTE: use this in place PyTorch's RelaxedOneHotCategorical distribution since its log_prob is not working right (returns positive values) 127 | Papers: 128 | [1] The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables (Maddison et al, 2017) 129 | [2] Categorical Reparametrization with Gumbel-Softmax (Jang et al, 2017) 130 | """ 131 | 132 | def sample(self, sample_shape=torch.Size()): 133 | """Gumbel-softmax sampling. Note rsample is inherited from RelaxedOneHotCategorical""" 134 | u = torch.empty( 135 | self.logits.size(), device=self.logits.device, dtype=self.logits.dtype 136 | ).uniform_(0, 1) 137 | noisy_logits = self.logits - torch.log(-torch.log(u)) 138 | return torch.argmax(noisy_logits, dim=-1) 139 | 140 | def rsample(self, sample_shape=torch.Size()): 141 | """ 142 | Gumbel-softmax resampling using the Straight-Through trick. 143 | Credit to Ian Temple for bringing this to our attention. To see standalone code of how this works, refer to https://gist.github.com/yzh119/fd2146d2aeb329d067568a493b20172f 144 | """ 145 | rout = super().rsample(sample_shape) # differentiable 146 | out = F.one_hot(torch.argmax(rout, dim=-1), self.logits.shape[-1]).float() 147 | return (out - rout).detach() + rout 148 | 149 | def log_prob(self, value): 150 | """value is one-hot or relaxed""" 151 | # if self.logits.shape[-1] == 1: 152 | # value = torch.zeros_like(value) 153 | if value.shape != self.logits.shape: 154 | value = F.one_hot(value.long(), self.logits.shape[-1]).float() 155 | assert value.shape == self.logits.shape 156 | return -torch.sum(-value * F.log_softmax(self.logits, -1), -1) # scaling 157 | 158 | def entropy(self): 159 | return self.base_dist._categorical.entropy() 160 | 161 | 162 | FixedGumbelSoftmax = GumbelSoftmax 163 | old_sample_gumbel = FixedGumbelSoftmax.sample 164 | FixedGumbelSoftmax.sample = lambda self: old_sample_gumbel(self).unsqueeze(-1) 165 | log_prob_gumbel = FixedGumbelSoftmax.log_prob 166 | FixedGumbelSoftmax.log_probs = lambda self, actions: log_prob_gumbel( 167 | self, actions.squeeze(-1) 168 | ).unsqueeze(-1) 169 | gumbel_entropy = FixedGumbelSoftmax.entropy 170 | FixedGumbelSoftmax.entropy = lambda self: gumbel_entropy(self) 171 | FixedGumbelSoftmax.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True) 172 | gumbel_rsample = FixedGumbelSoftmax.rsample 173 | FixedGumbelSoftmax.rsample = lambda self: gumbel_rsample(self).float() 174 | -------------------------------------------------------------------------------- /rl/policies/mlp_actor_critic.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | from gym import spaces 6 | 7 | from rl.policies.utils import CNN, MLP 8 | from rl.policies.actor_critic import Actor, Critic 9 | from util.gym import observation_size, action_size 10 | 11 | 12 | class MlpActor(Actor): 13 | def __init__( 14 | self, 15 | config, 16 | ob_space, 17 | ac_space, 18 | tanh_policy, 19 | deterministic=False, 20 | activation="relu", 21 | rl_hid_size=None, 22 | ): 23 | super().__init__(config, ob_space, ac_space, tanh_policy) 24 | 25 | self._ac_space = ac_space 26 | self._deterministic = deterministic 27 | if rl_hid_size == None: 28 | rl_hid_size = config.rl_hid_size 29 | 30 | # observation 31 | input_dim = observation_size(ob_space) 32 | 33 | self.fc = MLP( 34 | config, 35 | input_dim, 36 | rl_hid_size, 37 | [rl_hid_size] * config.actor_num_hid_layers, 38 | activation=activation, 39 | ) 40 | self.fc_means = nn.ModuleDict() 41 | self.fc_log_stds = nn.ModuleDict() 42 | 43 | for k, space in ac_space.spaces.items(): 44 | if isinstance(space, spaces.Box): 45 | self.fc_means.update( 46 | { 47 | k: MLP( 48 | config, 49 | rl_hid_size, 50 | action_size(space), 51 | activation=activation, 52 | ) 53 | } 54 | ) 55 | if not self._deterministic: 56 | self.fc_log_stds.update( 57 | { 58 | k: MLP( 59 | config, 60 | rl_hid_size, 61 | action_size(space), 62 | activation=activation, 63 | ) 64 | } 65 | ) 66 | elif isinstance(space, spaces.Discrete): 67 | self.fc_means.update( 68 | {k: MLP(config, rl_hid_size, space.n, activation=activation)} 69 | ) 70 | else: 71 | self.fc_means.update( 72 | {k: MLP(config, rl_hid_size, space, activation=activation)} 73 | ) 74 | 75 | def forward(self, ob, deterministic=False): 76 | inp = list(ob.values()) 77 | if len(inp[0].shape) == 1: 78 | inp = [x.unsqueeze(0) for x in inp] 79 | 80 | out = self._activation_fn(self.fc(torch.cat(inp, dim=-1))) 81 | out = torch.reshape(out, (out.shape[0], -1)) 82 | 83 | means, stds = OrderedDict(), OrderedDict() 84 | 85 | for k, space in self._ac_space.spaces.items(): 86 | mean = self.fc_means[k](out) 87 | if isinstance(space, spaces.Box) and not self._deterministic: 88 | if self._config.algo == "ppo": 89 | zeros = torch.zeros(mean.size()).to(self._config.device) 90 | log_std = self.fc_log_stds[k](zeros) 91 | else: 92 | log_std = self.fc_log_stds[k](out) 93 | log_std = torch.clamp(log_std, -10, 2) 94 | std = torch.exp(log_std.double()) 95 | else: 96 | std = None 97 | means[k] = mean 98 | stds[k] = std 99 | return means, stds 100 | 101 | 102 | class MlpCritic(Critic): 103 | def __init__( 104 | self, config, ob_space, ac_space=None, activation="relu", rl_hid_size=None 105 | ): 106 | super().__init__(config) 107 | 108 | input_dim = observation_size(ob_space) 109 | if ac_space is not None: 110 | input_dim += action_size(ac_space) 111 | 112 | if rl_hid_size == None: 113 | rl_hid_size = config.rl_hid_size 114 | 115 | self.fc = MLP(config, input_dim, 1, [rl_hid_size] * 2, activation=activation) 116 | 117 | def forward(self, ob, ac=None): 118 | inp = list(ob.values()) 119 | if len(inp[0].shape) == 1: 120 | inp = [x.unsqueeze(0) for x in inp] 121 | if ac is not None: 122 | ac = list(ac.values()) 123 | if len(ac[0].shape) == 1: 124 | ac = [x.unsqueeze(0) for x in ac] 125 | inp.extend(ac) 126 | 127 | out = self.fc(torch.cat(inp, dim=-1)) 128 | out = torch.reshape(out, (out.shape[0], 1)) 129 | 130 | return out 131 | 132 | 133 | # Necessary for my KFAC implementation. 134 | class AddBias(nn.Module): 135 | def __init__(self, bias): 136 | super(AddBias, self).__init__() 137 | self._bias = nn.Parameter(bias.unsqueeze(1)) 138 | 139 | def forward(self, x): 140 | if x.dim() == 2: 141 | bias = self._bias.t().view(1, -1) 142 | else: 143 | bias = self._bias.t().view(1, -1, 1, 1) 144 | 145 | return x + bias 146 | -------------------------------------------------------------------------------- /rl/policies/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class CNN(nn.Module): 8 | def __init__(self, config, input_dim): 9 | super().__init__() 10 | self.activation_fn = nn.ReLU() 11 | 12 | self.convs = nn.ModuleList() 13 | w = config.img_width 14 | init_ = lambda m: init( 15 | m, 16 | nn.init.orthogonal_, 17 | lambda x: nn.init.constant_(x, 0), 18 | nn.init.calculate_gain("relu"), 19 | ) 20 | 21 | for k, s, d in zip(config.kernel_size, config.stride, config.conv_dim): 22 | self.convs.append(init_(nn.Conv2d(input_dim, d, int(k), int(s)))) 23 | w = int(np.floor((w - (int(k) - 1) - 1) / int(s) + 1)) 24 | input_dim = d 25 | 26 | # screen_width == 32 (8,4)-(3,2) -> 3x3 27 | # screen_width == 64 (8,4)-(3,2)-(3,2) -> 3x3 28 | # screen_width == 128 (8,4)-(3,2)-(3,2)-(3,2) -> 3x3 29 | # screen_width == 256 (8,4)-(3,2)-(3,2)-(3,2) -> 7x7 30 | 31 | print("Output of CNN = %d x %d x %d" % (w, w, d)) 32 | self.w = w 33 | self.output_size = w * w * d 34 | 35 | def forward(self, ob): 36 | out = ob 37 | for conv in self.convs: 38 | out = self.activation_fn(conv(out)) 39 | out = out.flatten(start_dim=1) 40 | return out 41 | 42 | 43 | def fanin_init(tensor): 44 | size = tensor.size() 45 | if len(size) == 2: 46 | fan_in = size[0] 47 | elif len(size) > 2: 48 | fan_in = np.prod(size[1:]) 49 | else: 50 | raise Exception("Shape must be have dimension at least 2.") 51 | bound = 1.0 / np.sqrt(fan_in) 52 | return tensor.data.uniform_(-bound, bound) 53 | 54 | 55 | class MLP(nn.Module): 56 | def __init__( 57 | self, 58 | config, 59 | input_dim, 60 | output_dim, 61 | hid_dims=[], 62 | last_activation=False, 63 | activation="relu", 64 | ): 65 | super().__init__() 66 | if activation == "relu": 67 | activation_fn = nn.ReLU() 68 | elif activation == "tanh": 69 | activation_fn = nn.Tanh() 70 | elif acitvation == "elu": 71 | activation_fn = nn.Elu() 72 | else: 73 | return NotImplementedError 74 | 75 | init_ = lambda m: init( 76 | m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2) 77 | ) 78 | 79 | fc = [] 80 | prev_dim = input_dim 81 | for d in hid_dims: 82 | fc.append(nn.Linear(prev_dim, d)) 83 | fanin_init(fc[-1].weight) 84 | fc[-1].bias.data.fill_(0.1) 85 | fc.append(activation_fn) 86 | prev_dim = d 87 | fc.append(nn.Linear(prev_dim, output_dim)) 88 | fc[-1].weight.data.uniform_(-1e-3, 1e-3) 89 | fc[-1].bias.data.uniform_(-1e-3, 1e-3) 90 | if last_activation: 91 | fc.append(activation_fn) 92 | self.fc = nn.Sequential(*fc) 93 | 94 | def forward(self, ob): 95 | return self.fc(ob) 96 | 97 | 98 | def init(module, weight_init, bias_init, gain=1): 99 | weight_init(module.weight.data, gain=gain) 100 | bias_init(module.bias.data) 101 | return module 102 | -------------------------------------------------------------------------------- /scripts/2d/baseline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | algo='sac' 5 | prefix="BASELINE.SAC" 6 | env="PusherObstacle-v0" 7 | max_episode_step="400" 8 | debug="True" 9 | log_root_dir="./logs" 10 | reward_scale='10.' 11 | vis_replay="True" 12 | success_reward='150.' 13 | 14 | python -m rl.main \ 15 | --log_root_dir $log_root_dir \ 16 | --prefix $prefix \ 17 | --env $env \ 18 | --gpu $gpu \ 19 | --max_episode_step $max_episode_step \ 20 | --debug $debug \ 21 | --algo $algo \ 22 | --seed $seed \ 23 | --reward_scale $reward_scale \ 24 | --vis_replay $vis_replay \ 25 | --success_reward $success_reward \ 26 | -------------------------------------------------------------------------------- /scripts/2d/baseline_ik.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | algo='sac' 5 | prefix="BASELINE.SAC.IK" 6 | env="PusherObstacle-v0" 7 | gpu=$gpu 8 | max_episode_step="400" 9 | debug="False" 10 | log_root_dir="./logs" 11 | reward_scale='10.' 12 | vis_replay="True" 13 | success_reward='150.' 14 | use_ik_target="True" 15 | action_range="0.01" 16 | 17 | python -m rl.main \ 18 | --log_root_dir $log_root_dir \ 19 | --prefix $prefix \ 20 | --env $env \ 21 | --gpu $gpu \ 22 | --max_episode_step $max_episode_step \ 23 | --debug $debug \ 24 | --algo $algo \ 25 | --seed $seed \ 26 | --reward_scale $reward_scale \ 27 | --vis_replay $vis_replay \ 28 | --success_reward $success_reward \ 29 | --use_ik_target $use_ik_target \ 30 | --ckpt_interval $ckpt_interval \ 31 | --action_range $action_range 32 | -------------------------------------------------------------------------------- /scripts/2d/baseline_lg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | algo='sac' 5 | prefix="BASELINE.SAC.LG" 6 | env="PusherObstacle-v0" 7 | max_episode_step="400" 8 | debug="False" 9 | log_root_dir="./logs" 10 | reward_scale='10.' 11 | vis_replay="True" 12 | success_reward='150.' 13 | expand_ac_space="True" 14 | action_range='1.0' 15 | use_smdp_update="True" 16 | 17 | python -m rl.main \ 18 | --log_root_dir $log_root_dir \ 19 | --prefix $prefix \ 20 | --env $env \ 21 | --gpu $gpu \ 22 | --max_episode_step $max_episode_step \ 23 | --debug $debug \ 24 | --algo $algo \ 25 | --seed $seed \ 26 | --reward_scale $reward_scale \ 27 | --vis_replay $vis_replay \ 28 | --success_reward $success_reward \ 29 | --expand_ac_space $expand_ac_space \ 30 | --action_range $action_range \ 31 | --use_smdp_update $use_smdp_update 32 | -------------------------------------------------------------------------------- /scripts/2d/mopa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | 5 | algo='sac' 6 | prefix="MoPA-SAC" 7 | env="PusherObstacle-v0" 8 | max_episode_step="400" 9 | debug="False" 10 | log_root_dir="./logs" 11 | mopa="True" 12 | reward_scale="0.2" 13 | reuse_data="True" 14 | action_range="1.0" 15 | omega='0.5' 16 | use_smdp_update="True" 17 | stochastic_eval="True" 18 | invalid_target_handling="True" 19 | max_reuse_data='30' 20 | ac_space_type="piecewise" 21 | success_reward="150.0" 22 | 23 | 24 | python -m rl.main \ 25 | --log_root_dir $log_root_dir \ 26 | --prefix $prefix \ 27 | --env $env \ 28 | --gpu $gpu \ 29 | --max_episode_step $max_episode_step \ 30 | --debug $debug \ 31 | --algo $algo \ 32 | --seed $seed \ 33 | --mopa $mopa \ 34 | --reward_scale $reward_scale \ 35 | --reuse_data $reuse_data \ 36 | --action_range $action_range \ 37 | --omega $omega \ 38 | --success_reward $success_reward \ 39 | --stochastic_eval $stochastic_eval \ 40 | --invalid_target_handling $invalid_target_handling \ 41 | --max_reuse_data $max_reuse_data \ 42 | --ac_space_type $ac_space_type \ 43 | --use_smdp_update $use_smdp_update \ 44 | -------------------------------------------------------------------------------- /scripts/2d/mopa_discrete.sh: -------------------------------------------------------------------------------- 1 | #/!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | algo='sac' 5 | prefix="MoPA-SAC.discrete" 6 | env="PusherObstacle-v0" 7 | max_episode_step="400" 8 | debug="False" 9 | reward_type='sparse' 10 | log_root_dir="./logs" 11 | mopa="True" 12 | reward_scale="0.2" 13 | reuse_data="True" 14 | action_range="1.0" 15 | stochastic_eval="True" 16 | invalid_target_handling="True" 17 | max_reuse_data='30' 18 | use_smdp_update="True" 19 | ac_space_type="normal" 20 | success_reward="150.0" 21 | discrete_action="True" 22 | 23 | 24 | #mpiexec -n $workers 25 | python -m rl.main \ 26 | --log_root_dir $log_root_dir \ 27 | --prefix $prefix \ 28 | --env $env \ 29 | --gpu $gpu \ 30 | --max_episode_step $max_episode_step \ 31 | --debug $debug \ 32 | --algo $algo \ 33 | --seed $seed \ 34 | --reward_type $reward_type \ 35 | --comment $comment \ 36 | --mopa $mopa \ 37 | --reward_scale $reward_scale \ 38 | --reuse_data $reuse_data \ 39 | --action_range $action_range \ 40 | --discrete_action $discrete_action \ 41 | --success_reward $success_reward \ 42 | --stochastic_eval $stochastic_eval \ 43 | --invalid_target_handling $invalid_target_handling \ 44 | --max_reuse_data $max_reuse_data \ 45 | --use_smdp_update $use_smdp_update \ 46 | --ac_space_type $ac_space_type \ 47 | -------------------------------------------------------------------------------- /scripts/2d/mopa_ik.sh: -------------------------------------------------------------------------------- 1 | #/!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | 5 | algo='sac' 6 | prefix="MoPA-SAC.IK" 7 | env="PusherObstacle-v0" 8 | max_episode_step="400" 9 | debug="False" 10 | reward_type='sparse' 11 | log_root_dir="./logs" 12 | mopa="True" 13 | reward_scale="0.2" 14 | reuse_data="True" 15 | action_range="0.1" 16 | stochastic_eval="True" 17 | invalid_target_handling="True" 18 | max_reuse_data='30' 19 | use_smdp_update="True" 20 | success_reward="150.0" 21 | use_ik_target="True" 22 | ik_target="fingertip" 23 | omega='0.1' 24 | 25 | python -m rl.main \ 26 | --log_root_dir $log_root_dir \ 27 | --prefix $prefix \ 28 | --env $env \ 29 | --gpu $gpu \ 30 | --max_episode_step $max_episode_step \ 31 | --debug $debug \ 32 | --algo $algo \ 33 | --seed $seed \ 34 | --reward_type $reward_type \ 35 | --mopa $mopa \ 36 | --reward_scale $reward_scale \ 37 | --reuse_data $reuse_data \ 38 | --action_range $action_range \ 39 | --success_reward $success_reward \ 40 | --stochastic_eval $stochastic_eval \ 41 | --invalid_target_handling $invalid_target_handling \ 42 | --max_reuse_data $max_reuse_data \ 43 | --use_smdp_update $use_smdp_update \ 44 | --use_ik_target $use_ik_target \ 45 | --ik_target $ik_target \ 46 | --omega $omega 47 | -------------------------------------------------------------------------------- /scripts/3d/assembly/baseline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | prefix="BASELINE" 5 | env="SawyerAssemblyObstacle-v0" 6 | algo='sac' 7 | max_episode_step="250" 8 | debug="False" 9 | reward_type='sparse' 10 | log_root_dir="./logs" 11 | vis_replay="True" 12 | plot_type='3d' 13 | success_reward='150.' 14 | reward_scale="10." 15 | 16 | python -m rl.main \ 17 | --log_root_dir $log_root_dir \ 18 | --prefix $prefix \ 19 | --env $env \ 20 | --gpu $gpu \ 21 | --max_episode_step $max_episode_step \ 22 | --debug $debug \ 23 | --algo $algo \ 24 | --seed $seed \ 25 | --reward_type $reward_type \ 26 | --vis_replay $vis_replay \ 27 | --plot_type $plot_type \ 28 | --success_reward $success_reward \ 29 | --reward_scale $reward_scale 30 | -------------------------------------------------------------------------------- /scripts/3d/assembly/baseline_ik.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | prefix="BASELINE.IK" 5 | env="SawyerAssemblyObstacle-v0" 6 | algo='sac' 7 | max_episode_step="250" 8 | debug="False" 9 | reward_type='sparse' 10 | log_root_dir="./logs" 11 | vis_replay="True" 12 | plot_type='3d' 13 | success_reward='150.' 14 | reward_scale="10." 15 | use_ik_target="True" 16 | ik_target="grip_site" 17 | action_range="0.001" 18 | 19 | python -m rl.main \ 20 | --log_root_dir $log_root_dir \ 21 | --prefix $prefix \ 22 | --env $env \ 23 | --gpu $gpu \ 24 | --max_episode_step $max_episode_step \ 25 | --debug $debug \ 26 | --algo $algo \ 27 | --seed $seed \ 28 | --reward_type $reward_type \ 29 | --vis_replay $vis_replay \ 30 | --plot_type $plot_type \ 31 | --success_reward $success_reward \ 32 | --reward_scale $reward_scale \ 33 | --use_ik_target $use_ik_target \ 34 | --ik_target $ik_target \ 35 | --action_range $action_range 36 | -------------------------------------------------------------------------------- /scripts/3d/assembly/baseline_lg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | gpu=$1 3 | seed=$2 4 | prefix="BASELINE.SAC.LG" 5 | env="SawyerAssemblyObstacle-v0" 6 | data='08.09' 7 | algo='sac' 8 | max_episode_step="250" 9 | debug="False" 10 | reward_type='sparse' 11 | log_root_dir="./logs" 12 | vis_replay="True" 13 | plot_type='3d' 14 | success_reward='150.' 15 | reward_scale="10." 16 | action_range="0.5" 17 | expand_ac_space='True' 18 | use_smdp_update="True" 19 | 20 | python -m rl.main \ 21 | --log_root_dir $log_root_dir \ 22 | --prefix $prefix \ 23 | --env $env \ 24 | --gpu $gpu \ 25 | --max_episode_step $max_episode_step \ 26 | --debug $debug \ 27 | --algo $algo \ 28 | --seed $seed \ 29 | --reward_type $reward_type \ 30 | --vis_replay $vis_replay \ 31 | --plot_type $plot_type \ 32 | --success_reward $success_reward \ 33 | --reward_scale $reward_scale \ 34 | --action_range $action_range \ 35 | --expand_ac_space $expand_ac_space \ 36 | --use_smdp_update $use_smdp_update 37 | -------------------------------------------------------------------------------- /scripts/3d/assembly/mopa.sh: -------------------------------------------------------------------------------- 1 | # 0.0001: 126 | theta = 2 * np.arcsin(sin_theta) 127 | theta *= 1 if quat[0] >= 0 else -1 128 | axis = quat[1:] / sin_theta 129 | 130 | return axis, theta 131 | 132 | 133 | def quat_to_zangle(quat): 134 | q = quat_mul(quat_inv(quat_create(np.array([0, 1.0, 0]), np.pi / 2)), quat) 135 | ax, angle = quat2axisangle(q) 136 | return angle 137 | 138 | 139 | def zangle_to_quat(zangle): 140 | """ 141 | :param zangle in rad 142 | :return: quaternion 143 | """ 144 | return quat_mul( 145 | quat_create(np.array([0, 1.0, 0]), np.pi / 2), 146 | quat_create(np.array([-1.0, 0, 0]), zangle), 147 | ) 148 | 149 | 150 | def quat_create(axis, angle): 151 | """ 152 | Create a quaternion from an axis and angle. 153 | :param axis The three dimensional axis 154 | :param angle The angle in radians 155 | :return: A 4-d array containing the components of a quaternion. 156 | """ 157 | quat = np.zeros([4], dtype="float") 158 | mujoco_py.functions.mju_axisAngle2Quat(quat, axis, angle) 159 | return quat 160 | 161 | 162 | def quat_inv(quat): 163 | """ 164 | Invert a quaternion, represented by a 4d array. 165 | :param A quaternion (4-d array). Must not be the zero quaternion (all elements equal to zero) 166 | :return: A 4-d array containing the components of a quaternion. 167 | """ 168 | d = 1.0 / np.sum(quat ** 2) 169 | return d * np.array([1.0, -1.0, -1.0, -1.0]) * quat 170 | 171 | 172 | def quat_mul(quat1, quat2): 173 | """ 174 | Multiply two quaternions, both represented as 4-d arrays. 175 | """ 176 | prod_quat = np.zeros([4], dtype="float") 177 | mujoco_py.functions.mju_mulQuat(prod_quat, quat1, quat2) 178 | return prod_quat 179 | --------------------------------------------------------------------------------