├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── config
    ├── __init__.py
    ├── motion_planner.py
    ├── pusher.py
    └── sawyer.py
├── docs
    ├── content.md
    ├── img
    │   ├── 2D_push.png
    │   ├── clvrbanner.png
    │   ├── favicon-32x32.png
    │   ├── method.png
    │   ├── result.png
    │   ├── sawyer_assembly.png
    │   ├── sawyer_lift.png
    │   ├── sawyer_push.png
    │   └── teaser.png
    ├── index.html
    ├── js
    │   └── figure-extension.js
    ├── theme.css
    └── video
    │   ├── push_baseline.mp4
    │   ├── sawyer_assembly_baseline.mp4
    │   ├── sawyer_assembly_baseline_lg.mp4
    │   ├── sawyer_assembly_mopa.mp4
    │   ├── sawyer_lift_baseline.mp4
    │   ├── sawyer_lift_baseline_lg.mp4
    │   ├── sawyer_lift_mopa.gif
    │   ├── sawyer_lift_mopa.mp4
    │   ├── sawyer_push_baseline.mp4
    │   ├── sawyer_push_baseline_lg.mp4
    │   ├── sawyer_push_mopa.gif
    │   ├── sawyer_push_mopa.mp4
    │   └── teaser.gif
├── env
    ├── __init__.py
    ├── assets
    │   ├── meshes
    │   │   ├── sawyer
    │   │   │   ├── base.stl
    │   │   │   ├── head.stl
    │   │   │   ├── l0.stl
    │   │   │   ├── l1.stl
    │   │   │   ├── l2.stl
    │   │   │   ├── l3.stl
    │   │   │   ├── l4.stl
    │   │   │   ├── l5.stl
    │   │   │   ├── l6.stl
    │   │   │   └── pedestal.stl
    │   │   ├── toy_table
    │   │   │   ├── 0.stl
    │   │   │   ├── 1.stl
    │   │   │   ├── 2.stl
    │   │   │   ├── 3.stl
    │   │   │   └── 4.stl
    │   │   └── two_finger_gripper
    │   │   │   ├── electric_gripper_base.STL
    │   │   │   ├── half_round_tip.STL
    │   │   │   └── standard_narrow.STL
    │   ├── objects
    │   │   ├── can-visual.xml
    │   │   ├── can.xml
    │   │   └── meshes
    │   │   │   └── can.stl
    │   ├── textures
    │   │   ├── can.png
    │   │   ├── dark-wood.png
    │   │   ├── grid.png
    │   │   └── light-wood.png
    │   └── xml
    │   │   ├── common
    │   │       ├── basic_scene.xml
    │   │       ├── camera.xml
    │   │       ├── gripper_assembly_chain.xml
    │   │       ├── gripper_chain.xml
    │   │       ├── gripper_indicator_chain.xml
    │   │       ├── gripper_pick_chain.xml
    │   │       ├── gripper_pick_indicator_chain.xml
    │   │       ├── gripper_pick_pos_act.xml
    │   │       ├── gripper_pick_target_chain.xml
    │   │       ├── gripper_pos_act.xml
    │   │       ├── gripper_target_chain.xml
    │   │       ├── materials.xml
    │   │       ├── pusher.xml
    │   │       ├── pusher_gripper.xml
    │   │       ├── sawyer.xml
    │   │       ├── sawyer_assembly.xml
    │   │       ├── sawyer_assembly_chain.xml
    │   │       ├── sawyer_chain.xml
    │   │       ├── sawyer_dependencies.xml
    │   │       ├── sawyer_indicator_chain.xml
    │   │       ├── sawyer_joint_pos_act.xml
    │   │       ├── sawyer_no_gripper.xml
    │   │       ├── sawyer_no_gripper_chain.xml
    │   │       ├── sawyer_no_gripper_indicator_chain.xml
    │   │       ├── sawyer_no_gripper_target_chain.xml
    │   │       ├── sawyer_pick.xml
    │   │       ├── sawyer_pick_chain.xml
    │   │       ├── sawyer_pick_indicator_chain.xml
    │   │       ├── sawyer_pick_target_chain.xml
    │   │       ├── sawyer_target_chain.xml
    │   │       ├── skybox.xml
    │   │       ├── target.xml
    │   │       └── visual.xml
    │   │   ├── pusher_obstacle.xml
    │   │   ├── sawyer.xml
    │   │   ├── sawyer_assembly.xml
    │   │   ├── sawyer_assembly_obstacle.xml
    │   │   ├── sawyer_lift.xml
    │   │   ├── sawyer_lift_obstacle.xml
    │   │   ├── sawyer_push.xml
    │   │   └── sawyer_push_obstacle.xml
    ├── base.py
    ├── inverse_kinematics.py
    ├── pusher
    │   ├── __init__.py
    │   └── pusher_obstacle.py
    └── sawyer
    │   ├── __init__.py
    │   ├── sawyer.py
    │   ├── sawyer_assembly.py
    │   ├── sawyer_assembly_obstacle.py
    │   ├── sawyer_lift.py
    │   ├── sawyer_lift_obstacle.py
    │   ├── sawyer_push.py
    │   └── sawyer_push_obstacle.py
├── motion_planners
    ├── 3rd_party
    │   └── include
    │   │   └── cxxopts.hpp
    ├── KinematicPlanner.cpp
    ├── __init__.py
    ├── include
    │   ├── KinematicPlanner.h
    │   ├── c_planner.pxd.bak
    │   ├── compound_state_projector.h
    │   ├── mujoco_ompl_interface.h
    │   ├── mujoco_wrapper.h
    │   └── plan.pxd.bak
    ├── planner.cpp
    ├── planner.pyx
    ├── sampling_based_planner.py
    ├── setup.py
    ├── setup_macos.py
    └── src
    │   ├── compound_state_projector.cpp
    │   ├── mujoco_ompl_interface.cpp
    │   └── mujoco_wrapper.cpp
├── requirements.txt
├── rl
    ├── __init__.py
    ├── base_agent.py
    ├── dataset.py
    ├── main.py
    ├── mopa_rollouts.py
    ├── planner_agent.py
    ├── policies
    │   ├── __init__.py
    │   ├── actor_critic.py
    │   ├── distributions.py
    │   ├── mlp_actor_critic.py
    │   └── utils.py
    ├── rollouts.py
    ├── sac_agent.py
    ├── td3_agent.py
    └── trainer.py
├── scripts
    ├── 2d
    │   ├── baseline.sh
    │   ├── baseline_ik.sh
    │   ├── baseline_lg.sh
    │   ├── mopa.sh
    │   ├── mopa_discrete.sh
    │   └── mopa_ik.sh
    ├── 3d
    │   ├── assembly
    │   │   ├── baseline.sh
    │   │   ├── baseline_ik.sh
    │   │   ├── baseline_lg.sh
    │   │   ├── mopa.sh
    │   │   ├── mopa_discrete.sh
    │   │   └── mopa_ik.sh
    │   ├── lift
    │   │   ├── baseline.sh
    │   │   ├── baseline_ik.sh
    │   │   ├── baseline_lg.sh
    │   │   ├── mopa.sh
    │   │   ├── mopa_discrete.sh
    │   │   └── mopa_ik.sh
    │   └── push
    │   │   ├── baseline.sh
    │   │   ├── baseline_ik.sh
    │   │   ├── baseline_lg.sh
    │   │   ├── mopa.sh
    │   │   ├── mopa_discrete.sh
    │   │   └── mopa_ik.sh
    └── misc
    │   ├── evaluate_safety.sh
    │   └── installEigen.sh
└── util
    ├── __init__.py
    ├── contact_info.py
    ├── env.py
    ├── gym.py
    ├── info.py
    ├── logger.py
    ├── misc.py
    ├── mpi.py
    ├── pytorch.py
    ├── sawyer_env.py
    └── transform_utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Mac OS
 2 | .DS_Store
 3 | *~
 4 | .python-version
 5 | 
 6 | # Log
 7 | log
 8 | logs
 9 | 
10 | # Vim
11 | .*.s[a-w][a-z]
12 | Session.vim
13 | 
14 | # Data
15 | *.csv
16 | *.ini
17 | *.npy
18 | *.zip
19 | *screenlog*
20 | 
21 | # Mujoco
22 | MUJOCO_LOG.TXT
23 | 
24 | # Python
25 | ## Byte-compiled / optimized / DLL files
26 | __pycache__/
27 | *.py[cod]
28 | *$py.class
29 | 
30 | ## C extensions
31 | #*.so
32 | 
33 | ## Distribution / packaging
34 | .Python
35 | build/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | 
40 | ## Jupyter Notebook
41 | .ipynb_checkpoints
42 | **/*.ipynb
43 | 
44 | ## virtualenv
45 | .venv
46 | venv/
47 | 
48 | ## Rope project settings
49 | .ropeproject
50 | 
51 | ## Wandb
52 | wandb/
53 | 
54 | ## PyCharm
55 | .idea/
56 | 
57 | ## VSCode
58 | .vscode/
59 | 
60 | ## flake8
61 | .flake8
62 | z.vid
63 | z.log.global
64 | test_logs
65 | tmp
66 | 
67 | ## Eigen
68 | eigen-3.3.7/
69 | 
70 | motion_planners/*.so
71 | ./result
72 | ./plot
73 | # env/assets/xml/sawyer*
74 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Cognitive Learning for Vision and Robotics (CLVR) lab @ USC
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/config/motion_planner.py:
--------------------------------------------------------------------------------
 1 | from util import str2bool
 2 | 
 3 | 
 4 | def add_arguments(parser):
 5 |     """
 6 |     Adds a list of arguments to argparser for the reacher environment.
 7 |     """
 8 |     # reacher
 9 |     parser.add_argument(
10 |         "--planner_type",
11 |         type=str,
12 |         default="rrt_connect",
13 |         choices=[
14 |             "rrt",
15 |             "rrt_connect",
16 |         ],
17 |         help="planner type",
18 |     )
19 |     parser.add_argument(
20 |         "--simple_planner_type",
21 |         type=str,
22 |         default="rrt_connect",
23 |         choices=[
24 |             "rrt",
25 |             "rrt_connect",
26 |         ],
27 |         help="planner type for simple planner",
28 |     )
29 |     parser.add_argument(
30 |         "--planner_objective",
31 |         type=str,
32 |         default="path_length",
33 |         choices=[
34 |             "maximize_min_clearance",
35 |             "path_length",
36 |             "state_cost_integral",
37 |             "constraint",
38 |         ],
39 |         help="planner objective function",
40 |     )
41 |     parser.add_argument(
42 |         "--threshold",
43 |         type=float,
44 |         default=0.0,
45 |         help="threshold for optimization objective",
46 |     )
47 |     parser.add_argument(
48 |         "--is_simplified",
49 |         type=str2bool,
50 |         default=False,
51 |         help="enable simplification of planned trajectory",
52 |     )
53 |     parser.add_argument(
54 |         "--simplified_duration",
55 |         type=float,
56 |         default=0.01,
57 |         help="duration of simplification of planned trajectory",
58 |     )
59 |     parser.add_argument(
60 |         "--simple_planner_simplified",
61 |         type=str2bool,
62 |         default=False,
63 |         help="enable simplification of planned trajectory for simple planner",
64 |     )
65 |     parser.add_argument(
66 |         "--simple_planner_simplified_duration",
67 |         type=float,
68 |         default=0.01,
69 |         help="duration of simplification of planned trajectory for simple planner",
70 |     )
71 | 
72 | 
73 | def get_default_config():
74 |     """
75 |     Gets default configurations for the reacher environment.
76 |     """
77 |     import argparse
78 | 
79 |     parser = argparse.ArgumentParser("Default Configuration for Motion Planner")
80 |     add_argument(parser)
81 | 
82 |     config = parser.parse_args([])
83 |     return config
84 | 


--------------------------------------------------------------------------------
/config/pusher.py:
--------------------------------------------------------------------------------
  1 | from util import str2bool
  2 | 
  3 | 
  4 | def add_arguments(parser):
  5 |     """
  6 |     Adds a list of arguments to argparser for the pusher environment.
  7 |     """
  8 |     # pusher
  9 |     parser.add_argument(
 10 |         "--reward_type",
 11 |         type=str,
 12 |         default="dense",
 13 |         choices=["sparse"],
 14 |         help="reward type",
 15 |     )
 16 |     parser.add_argument(
 17 |         "--distance_threshold",
 18 |         type=float,
 19 |         default=0.05,
 20 |         help="distance threshold for termination",
 21 |     )
 22 |     parser.add_argument(
 23 |         "--max_episode_steps",
 24 |         type=int,
 25 |         default=150,
 26 |         help="maximum timesteps in an episode",
 27 |     )
 28 | 
 29 |     # observations
 30 |     parser.add_argument(
 31 |         "--screen_width", type=int, default=500, help="width of camera image"
 32 |     )
 33 |     parser.add_argument(
 34 |         "--screen_height", type=int, default=500, help="height of camera image"
 35 |     )
 36 |     parser.add_argument(
 37 |         "--frame_skip", type=int, default=1, help="Numer of skip frames"
 38 |     )
 39 |     parser.add_argument(
 40 |         "--kp", type=float, default=150.0, help="p term for a PID controller"
 41 |     )
 42 |     parser.add_argument(
 43 |         "--kd", type=float, default=20.0, help="d term for a PID controller"
 44 |     )
 45 |     parser.add_argument(
 46 |         "--ki", type=float, default=0.1, help="i term for a PID controller"
 47 |     )
 48 |     parser.add_argument(
 49 |         "--frame_dt", type=float, default=1.0, help="delta t between each frame"
 50 |     )
 51 |     parser.add_argument(
 52 |         "--ctrl_reward_coef", type=float, default=0, help="control reward coefficient"
 53 |     )
 54 |     parser.add_argument(
 55 |         "--success_reward", type=float, default=150.0, help="completion reward"
 56 |     )
 57 |     parser.add_argument(
 58 |         "--camera_name", type=str, default="cam0", help="camera name in an environment"
 59 |     )
 60 |     parser.add_argument(
 61 |         "--range", type=float, default=0.2, help="range of motion planner"
 62 |     )
 63 |     parser.add_argument(
 64 |         "--simple_planner_range",
 65 |         type=float,
 66 |         default=0.1,
 67 |         help="range of simple motion planner",
 68 |     )
 69 |     parser.add_argument(
 70 |         "--timelimit", type=float, default=1.0, help="timelimit for planning"
 71 |     )
 72 |     parser.add_argument(
 73 |         "--simple_planner_timelimit",
 74 |         type=float,
 75 |         default=0.02,
 76 |         help="timelimit for planning by simple motion planner",
 77 |     )
 78 |     parser.add_argument(
 79 |         "--contact_threshold",
 80 |         type=float,
 81 |         default=-0.0015,
 82 |         help="depth threshold for contact",
 83 |     )
 84 |     parser.add_argument(
 85 |         "--joint_margin", type=float, default=0.0, help="margin of each joint"
 86 |     )
 87 |     parser.add_argument(
 88 |         "--step_size",
 89 |         type=float,
 90 |         default=0.04,
 91 |         help="step size for invalid target handling",
 92 |     )
 93 | 
 94 | 
 95 | def get_default_config():
 96 |     """
 97 |     Gets default configurations for the pusher environment.
 98 |     """
 99 |     import argparse
100 | 
101 |     parser = argparse.ArgumentParser("Default Configuration for 2D Pusher Environment")
102 |     add_argument(parser)
103 | 
104 |     parser.add_argument("--seed", type=int, default=1234, help="random seed")
105 |     parser.add_argument("--debug", type=str2bool, default=False, help="enable debugging")
106 | 
107 |     config = parser.parse_args([])
108 |     return config
109 | 


--------------------------------------------------------------------------------
/config/sawyer.py:
--------------------------------------------------------------------------------
  1 | from util import str2bool
  2 | 
  3 | 
  4 | def add_arguments(parser):
  5 |     """
  6 |     Adds a list of arguments to argparser for the sawyer environment.
  7 |     """
  8 |     # sawyer
  9 |     parser.add_argument(
 10 |         "--reward_type",
 11 |         type=str,
 12 |         default="dense",
 13 |         choices=["dense", "sparse"],
 14 |         help="reward type",
 15 |     )
 16 |     parser.add_argument(
 17 |         "--distance_threshold",
 18 |         type=float,
 19 |         default=0.06,
 20 |         help="distance threshold for termination",
 21 |     )
 22 |     parser.add_argument(
 23 |         "--max_episode_steps",
 24 |         type=int,
 25 |         default=250,
 26 |         help="maximum timesteps in an episode",
 27 |     )
 28 |     parser.add_argument(
 29 |         "--screen_width", type=int, default=500, help="width of camera image"
 30 |     )
 31 |     parser.add_argument(
 32 |         "--screen_height", type=int, default=500, help="height of camera image"
 33 |     )
 34 |     parser.add_argument(
 35 |         "--camera_name",
 36 |         type=str,
 37 |         default="visview",
 38 |         help="camera name in an environment",
 39 |     )
 40 | 
 41 |     # observations
 42 |     parser.add_argument(
 43 |         "--frame_skip", type=int, default=1, help="Numer of skip frames"
 44 |     )
 45 |     parser.add_argument(
 46 |         "--action_repeat", type=int, default=5, help="number of action repeats"
 47 |     )
 48 |     parser.add_argument(
 49 |         "--ctrl_reward_coef", type=float, default=0, help="control reward coefficient"
 50 |     )
 51 | 
 52 |     parser.add_argument(
 53 |         "--kp", type=float, default=40.0, help="p term for a PID controller"
 54 |     )  # 150.)
 55 |     parser.add_argument(
 56 |         "--kd", type=float, default=8.0, help="d term for a PID controller"
 57 |     )  # 20.)
 58 |     parser.add_argument(
 59 |         "--ki", type=float, default=0.0, help="i term for a PID controller"
 60 |     )
 61 |     parser.add_argument(
 62 |         "--frame_dt", type=float, default=0.15, help="delta t between each frame"
 63 |     )  # 0.1)
 64 |     parser.add_argument(
 65 |         "--use_robot_indicator",
 66 |         type=str2bool,
 67 |         default=True,
 68 |         help="enable visualization of robot indicator for motion planner",
 69 |     )
 70 |     parser.add_argument(
 71 |         "--use_target_robot_indicator",
 72 |         type=str2bool,
 73 |         default=True,
 74 |         help="enable visualization of robot indicator for target position of motion planner",
 75 |     )
 76 |     parser.add_argument(
 77 |         "--success_reward", type=float, default=150.0, help="completion reward"
 78 |     )
 79 |     parser.add_argument(
 80 |         "--range", type=float, default=0.1, help="range of motion planner"
 81 |     )
 82 |     parser.add_argument(
 83 |         "--simple_planner_range",
 84 |         type=float,
 85 |         default=0.05,
 86 |         help="range of simple motion planner",
 87 |     )
 88 |     parser.add_argument(
 89 |         "--timelimit", type=float, default=1.0, help="timelimit for motion planner"
 90 |     )
 91 |     parser.add_argument(
 92 |         "--simple_planner_timelimit",
 93 |         type=float,
 94 |         default=0.05,
 95 |         help="timelimit for simple motion planner",
 96 |     )
 97 |     parser.add_argument(
 98 |         "--contact_threshold",
 99 |         type=float,
100 |         default=-0.002,
101 |         help="depth thredhold for contact",
102 |     )
103 |     parser.add_argument(
104 |         "--joint_margin", type=float, default=0.001, help="marin of each joint"
105 |     )
106 |     parser.add_argument(
107 |         "--step_size",
108 |         type=float,
109 |         default=0.02,
110 |         help="step size for invalid target handling",
111 |     )
112 | 
113 | 
114 | def get_default_config():
115 |     """
116 |     Gets default configurations for the Sawyer environment.
117 |     """
118 |     import argparse
119 | 
120 |     parser = argparse.ArgumentParser("Default Configuration for Sawyer Environment")
121 |     add_argument(parser)
122 | 
123 |     parser.add_argument("--seed", type=int, default=1234, help="random seed")
124 |     parser.add_argument("--debug", type=str2bool, default=False, help="enable debugging")
125 | 
126 |     config = parser.parse_args([])
127 |     return config
128 | 


--------------------------------------------------------------------------------
/docs/content.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ----
  3 | 
  4 | ## Overview
  5 | 
  6 | ![MoPA-RL Framework](./img/method.png "")
  7 | 
  8 | To solve tasks in obstructed environments, we propose motion planner augmented reinforcement learning (<b>MoPA-RL</b>). Our framework consists of an <b>RL policy</b> and a <b>motion planner</b>. The motion planner is integrated into the RL policy by enlarging the action space. If a sampled action from the RL policy is in the original action space, an agent directly executes the action to the environment, otherwise the motion planner computes a path to move the agent to faraway points. MoPA-RL has three benefits:
  9 | <ul>
 10 | <li>Add motion planning capabilities to any RL agent with joint space control</li>
 11 | <li>Allow an agent to freely switch between MP and direct action execution  by controlling the scale of action</li>
 12 | <li>Naturally learn trajectories that avoid collisions by leveraging motion planning </li>
 13 | </ul>
 14 | 
 15 | ----
 16 | 
 17 | ## Videos
 18 | 
 19 | <span class="env-name"><b>Sawyer Push</b></span>
 20 | <p>Sawyer arm is required to find a path to reach an object inside of a box, and push it to a goal position.</p><br>
 21 | <div class="w3-row-padding">
 22 | 	<div class="w3-col s4 w3-center">
 23 | 		<video height="auto" width="100%" controls autoplay loop muted>
 24 | 		  <source src="./video/sawyer_push_baseline.mp4" type="video/mp4">
 25 | 		</video>
 26 | 		<div class="method-name">SAC</div>
 27 | 	</div>
 28 | 	<div class="w3-col s4 w3-center">
 29 | 		<video height="auto" width="100%" controls autoplay loop muted>
 30 | 		  <source src="./video/sawyer_push_baseline_lg.mp4" type="video/mp4">
 31 | 		</video>
 32 | 		<div class="method-name">SAC Large</div>
 33 | 	</div>
 34 | 	<div class="w3-col s4 w3-center">
 35 | 		<video height="auto" width="100%" controls autoplay loop muted>
 36 | 		  <source src="./video/sawyer_push_mopa.mp4" type="video/mp4">
 37 | 		</video>
 38 | 		<div class="method-name">MoPA-SAC (Ours)</div>
 39 | 	</div>
 40 | </div>
 41 | <span class="env-name"><b>Sawyer Lift</b></span>
 42 | <p>Sawyer arm needs to find a path to get inside a box, grasp a can and take it out from the box.</p><br>
 43 | <div class="w3-row-padding">
 44 | 	<!-- <div class="w3-col s3 w3-center"> -->
 45 | 	<!-- </div> -->
 46 | 	<div class="w3-col s4 w3-center">
 47 | 		<video height="auto" width="100%" controls autoplay loop muted>
 48 | 		  <source src="./video/sawyer_lift_baseline.mp4" type="video/mp4">
 49 | 		</video>
 50 | 		<div class="method-name">SAC</div>
 51 | 	</div>
 52 | 	<div class="w3-col s4 w3-center">
 53 | 		<video height="auto" width="100%" controls autoplay loop muted>
 54 | 		  <source src="./video/sawyer_lift_baseline_lg.mp4" type="video/mp4">
 55 | 		</video>
 56 | 		<div class="method-name">SAC Large</div>
 57 | 	</div>
 58 | 	<div class="w3-col s4 w3-center">
 59 | 		<video height="auto" width="100%" controls autoplay loop muted>
 60 | 		  <source src="./video/sawyer_lift_mopa.mp4" type="video/mp4">
 61 | 		</video>
 62 | 		<div class="method-name">MoPA-SAC (Ours)</div>
 63 | 	</div>
 64 | 	<!-- <div class="w3-col s3 w3-center"> -->
 65 | 	<!-- </div> -->
 66 | </div>
 67 | <span class="env-name"><b>Sawyer Assembly</b></span>
 68 | <p>Sawyer arm with an attached table leg needs to avoid other legs to reach a hole of the table, and insert the pole to assemble the table.</p><br>
 69 | <div class="w3-row-padding">
 70 | 	<div class="w3-col s4 w3-center">
 71 | 		<video height="auto" width="100%" controls autoplay loop muted>
 72 | 		  <source src="./video/sawyer_assembly_baseline.mp4" type="video/mp4">
 73 | 		</video>
 74 | 		<div class="method-name">SAC</div>
 75 | 	</div>
 76 | 	<div class="w3-col s4 w3-center">
 77 | 		<video height="auto" width="100%" controls autoplay loop muted>
 78 | 		  <source src="./video/sawyer_assembly_baseline_lg.mp4" type="video/mp4">
 79 | 		</video>
 80 | 		<div class="method-name">SAC Large</div>
 81 | 	</div>
 82 | 	<div class="w3-col s4 w3-center">
 83 | 		<video height="auto" width="100%" controls autoplay loop muted>
 84 | 		  <source src="./video/sawyer_assembly_mopa.mp4" type="video/mp4">
 85 | 		</video>
 86 | 		<div class="method-name">MoPA-SAC (Ours)</div>
 87 | 	</div>
 88 | </div>
 89 | 
 90 | ----
 91 | 
 92 | ## Quantitative results
 93 | 
 94 | <!-- ![Success Rate](./img/result.png "") -->
 95 | 
 96 | <div class="w3-row-padding">
 97 |     <div class="w3-col s1 w3-center"></div>
 98 |     <div class="w3-col s10 w3-center">
 99 |         <p>Success rates of our MoPA-SAC (green) and several baselines averaged over 4 seeds. Our approach can leverage the motion planner to converge with fewer environment steps than thebaseline. Both SAC and ours are trained for the same number of environment steps.</p>
100 |         <img src="./img/result.png"/>
101 |     </div>
102 |     <div class="w3-col s1 w3-center"></div>
103 | </div>
104 | 
105 | ----
106 | 
107 | ## Citation
108 | ```
109 | @inproceedings{yamada2020mopa,
110 |   title={Motion Planner Augmented Reinforcement Learning for Obstructed Environments},
111 |   author={Jun Yamada and Youngwoon Lee and Gautam Salhotra and Karl Pertsch and Max Pflueger and Gaurav S. Sukhatme and Joseph J. Lim and Peter Englert},
112 |   booktitle={Conference on Robot Learning},
113 |   year={2020},
114 | }
115 | ```
116 | 


--------------------------------------------------------------------------------
/docs/img/2D_push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/2D_push.png


--------------------------------------------------------------------------------
/docs/img/clvrbanner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/clvrbanner.png


--------------------------------------------------------------------------------
/docs/img/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/favicon-32x32.png


--------------------------------------------------------------------------------
/docs/img/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/method.png


--------------------------------------------------------------------------------
/docs/img/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/result.png


--------------------------------------------------------------------------------
/docs/img/sawyer_assembly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_assembly.png


--------------------------------------------------------------------------------
/docs/img/sawyer_lift.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_lift.png


--------------------------------------------------------------------------------
/docs/img/sawyer_push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_push.png


--------------------------------------------------------------------------------
/docs/img/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/teaser.png


--------------------------------------------------------------------------------
/docs/js/figure-extension.js:
--------------------------------------------------------------------------------
 1 | (function (extension) {
 2 |     if (typeof showdown !== 'undefined') {
 3 |         // global (browser or nodejs global)
 4 |         extension(showdown);
 5 |     } else if (typeof define === 'function' && define.amd) {
 6 |         // AMD
 7 |         define(['showdown'], extension);
 8 |     } else if (typeof exports === 'object') {
 9 |         // Node, CommonJS-like
10 |         module.exports = extension(require('showdown'));
11 |     } else {
12 |         // showdown was not found so we throw
13 |         throw Error('Could not find showdown library');
14 |     }
15 | }(function (showdown) {
16 | 
17 |     var fig = '<figure>' + '<img src="%1" alt="%2" title="%4">' + '<figcaption>%3</figcaption>' + '</figure>';
18 |     var imgRegex = /(?:<p>)?<img.*?src="(.+?)".*?alt="(.*?)"(.*?)\/?>(?:<\/p>)?/gi;
19 | 
20 |     // loading extension into shodown
21 |     showdown.extension('figure', function () {
22 |         return [
23 |             {
24 |                 type: 'output',
25 |                 filter: function (text, converter, options) {
26 |                     var tag = fig;
27 | 
28 |                     return text.replace(imgRegex, function (match, url, alt, rest) {
29 |                         return tag.replace('%1', url).replace('%2', alt).replace('%3', alt).replace('%4', alt);
30 |                     });
31 |                 }
32 |             }
33 |         ];
34 |     });
35 | }));


--------------------------------------------------------------------------------
/docs/video/push_baseline.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/push_baseline.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_assembly_baseline.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_baseline.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_assembly_baseline_lg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_baseline_lg.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_assembly_mopa.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_mopa.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_lift_baseline.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_baseline.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_lift_baseline_lg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_baseline_lg.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_lift_mopa.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_mopa.gif


--------------------------------------------------------------------------------
/docs/video/sawyer_lift_mopa.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_mopa.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_push_baseline.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_baseline.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_push_baseline_lg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_baseline_lg.mp4


--------------------------------------------------------------------------------
/docs/video/sawyer_push_mopa.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_mopa.gif


--------------------------------------------------------------------------------
/docs/video/sawyer_push_mopa.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_mopa.mp4


--------------------------------------------------------------------------------
/docs/video/teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/teaser.gif


--------------------------------------------------------------------------------
/env/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Define all environments. """
 2 | 
 3 | from gym.envs.registration import register
 4 | 
 5 | 
 6 | # register all environments to use
 7 | register(
 8 |     id="PusherObstacle-v0",
 9 |     entry_point="env.pusher:PusherObstacleEnv",
10 |     kwargs={},
11 | )
12 | 
13 | register(id="SawyerAssembly-v0", entry_point="env.sawyer:SawyerAssemblyEnv", kwargs={})
14 | register(
15 |     id="SawyerAssemblyObstacle-v0",
16 |     entry_point="env.sawyer:SawyerAssemblyObstacleEnv",
17 |     kwargs={},
18 | )
19 | 
20 | register(id="SawyerLift-v0", entry_point="env.sawyer:SawyerLiftEnv", kwargs={})
21 | register(
22 |     id="SawyerLiftObstacle-v0",
23 |     entry_point="env.sawyer:SawyerLiftObstacleEnv",
24 |     kwargs={},
25 | )
26 | 
27 | register(id="SawyerPush-v0", entry_point="env.sawyer:SawyerPushEnv", kwargs={})
28 | register(
29 |     id="SawyerPushObstacle-v0",
30 |     entry_point="env.sawyer:SawyerPushObstacleEnv",
31 |     kwargs={},
32 | )
33 | 


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/base.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/head.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/head.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l0.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l1.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l2.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l3.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l4.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l5.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l6.stl


--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/pedestal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/pedestal.stl


--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/0.stl


--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/1.stl


--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/2.stl


--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/3.stl


--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/4.stl


--------------------------------------------------------------------------------
/env/assets/meshes/two_finger_gripper/electric_gripper_base.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/electric_gripper_base.STL


--------------------------------------------------------------------------------
/env/assets/meshes/two_finger_gripper/half_round_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/half_round_tip.STL


--------------------------------------------------------------------------------
/env/assets/meshes/two_finger_gripper/standard_narrow.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/standard_narrow.STL


--------------------------------------------------------------------------------
/env/assets/objects/can-visual.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="can">
 2 |   <asset>
 3 |     <mesh file="meshes/can.stl" name="can_mesh"/>
 4 |   </asset>
 5 |   <worldbody>
 6 |     <body>
 7 |       <body name="visual">
 8 |         <geom pos="0 0 0" mesh="can_mesh" type="mesh" rgba="0.8 0.8 0.8 0.3"  conaffinity="0" contype="0" group="0" mass="0.0001"/>
 9 |         <geom pos="0 0 0" mesh="can_mesh" type="mesh" rgba="0.8 0.8 0.8 0.3"  conaffinity="0" contype="0" group="1" mass="0.0001"/>
10 |       </body>
11 |       <site rgba="0 0 0 0" size="0.005" pos="0 0 -0.06" name="bottom_site"/>
12 |       <site rgba="0 0 0 0" size="0.005" pos="0 0 0.04" name="top_site"/>
13 |       <site rgba="0 0 0 0" size="0.005" pos="0.025 0.025 0" name="horizontal_radius_site"/>
14 |     </body>
15 |   </worldbody>
16 | </mujoco>


--------------------------------------------------------------------------------
/env/assets/objects/can.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="can">
 2 |   <asset>
 3 |     <mesh file="meshes/can.stl" name="can_mesh"/>
 4 |     <texture file="../textures/can.png" name="tex-can"/>
 5 |     <material name="coke" reflectance="0.7" texrepeat="5 5" texture="tex-can" texuniform="true"/>
 6 |   </asset>
 7 |   <worldbody>
 8 |     <body>
 9 |       <body name="collision">
10 |         <geom pos="0 0 0" mesh="can_mesh" type="mesh" solimp="0.998 0.998 0.001" solref="0.001 1" density="100" friction="0.95 0.3 0.1" material="coke" group="1" condim="4"/>
11 |       </body>
12 |       <body name="visual">
13 |         <geom pos="0 0 0" mesh="can_mesh" type="mesh" material="coke"  conaffinity="0" contype="0" group="0" mass="0.0001"/>
14 |         <geom pos="0 0 0" mesh="can_mesh" type="mesh" material="coke"  conaffinity="0" contype="0" group="1" mass="0.0001"/>
15 |       </body>
16 |       <site rgba="0 0 0 0" size="0.005" pos="0 0 -0.06" name="bottom_site"/>
17 |       <site rgba="0 0 0 0" size="0.005" pos="0 0 0.04" name="top_site"/>
18 |       <site rgba="0 0 0 0" size="0.005" pos="0.025 0.025 0" name="horizontal_radius_site"/>
19 |     </body>
20 |   </worldbody>
21 | </mujoco>


--------------------------------------------------------------------------------
/env/assets/objects/meshes/can.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/objects/meshes/can.stl


--------------------------------------------------------------------------------
/env/assets/textures/can.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/can.png


--------------------------------------------------------------------------------
/env/assets/textures/dark-wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/dark-wood.png


--------------------------------------------------------------------------------
/env/assets/textures/grid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/grid.png


--------------------------------------------------------------------------------
/env/assets/textures/light-wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/light-wood.png


--------------------------------------------------------------------------------
/env/assets/xml/common/basic_scene.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <!-- <asset> -->
11 |     <!--     <texture builtin="gradient" height="256" rgb1=".9 .9 1." rgb2=".2 .3 .4" type="skybox" width="256" /> -->
12 |     <!--     <texture name="texplane" type="2d" builtin="checker" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" -->
13 |     <!--             width="512" height="512"/> -->
14 |     <!--     <material name="MatGnd" reflectance="0.5" texture="texplane" texrepeat="1 1" texuniform="true"/> -->
15 |     <!-- </asset> -->
16 |     <asset>
17 |         <!-- <texture builtin="gradient" height="256" rgb1=".9 .9 1." rgb2=".2 .3 .4" type="skybox" width="256" /> -->
18 |         <texture name="skybox" type="skybox" builtin="gradient" rgb1="0.556 0.705 0.960" rgb2="0.556 0.705 0.960"
19 |             width="800" height="800" mark="none"/>
20 |         <texture name="texplane" type="2d" builtin="checker" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2"
21 |             width="512" height="512"/>
22 |         <texture name="grid" type="2d" builtin="checker" rgb1=".8 .8 .8" rgb2=".99 .99 .99" width="37.5" height="37.5" mark="none"/>
23 |         <material name="grid" texture="grid" texrepeat="10 10" reflectance=".1"/>
24 |         <!-- <material name="MatGnd" reflectance="0.5" texture="texplane" texrepeat="1 1" texuniform="true"/> -->
25 |         <material name="MatGnd" texture="grid"  texrepeat="10 10" specular="1" shininess=".3" reflectance="0.00001"/>
26 |     </asset>
27 | 
28 |     <worldbody>
29 |         <!-- <!&#45;&#45; <light directional="false" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="1  1 3" dir="&#45;1 &#45;1 &#45;3"/> &#45;&#45;> -->
30 |         <!-- <light directional="false" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="1 &#45;1 3" dir="&#45;1 1 &#45;3"/> -->
31 |         <!-- <!&#45;&#45; <light directional="false" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="&#45;1 0 3" dir="1 0 &#45;3" /> &#45;&#45;> -->
32 |         <light castshadow="false" diffuse=".8 .8 .8" dir="0 -.15 -1" directional="false" pos="1.5 0.7 4.0" specular="0.3 0.3 0.3" />
33 |         <light castshadow="false" diffuse=".8 .8 .8" dir="0 -.15 -1" directional="false" pos="-2.5 -3.3 4.0" specular="0.3 0.3 0.3" />
34 |         <!-- <geom name="ground" pos="0 0 0" size="5 5 10" material="MatGnd" type="plane" contype="1" conaffinity="1"/> -->
35 |             <geom name="ground" pos="0 0 0" size="5 5 10" material="MatGnd" type="plane" contype="1" conaffinity="1"/>
36 |     </worldbody>
37 | 
38 | </mujocoinclude>
39 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/camera.xml:
--------------------------------------------------------------------------------
1 | <mujoco>
2 |   <worldbody>
3 |     <camera mode="fixed" name="cam0" pos="0 0 0" quat="0.56 0.43 0.43 0.56"/>
4 | </worldbody>
5 | </mujoco>
6 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_assembly_chain.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <body name="clawGripper" pos="0 0 .045" quat="-1 0 1 0">
11 |         <geom  type="box" size="0.01 0.04 0.01" />
12 |         <geom size="0.035 0.015" pos="-.024 0 0" type="cylinder" rgba="0.2 0.2 0.2 10" euler="0 1.57 0"/>
13 | 
14 |         <body name="rightclaw" pos=".01 .015 0" >
15 |             <geom name="rightclaw_it" type="box" pos="0 0.0 0" size="0.05 0.005 0.005"/>
16 |             <joint name="rc_close" type="slide" pos="0 0 0" axis="0 -1 0" range="0 .015" user="008" limited="true"/>
17 |         </body>
18 | 
19 |         <body name="peg" pos="0.08 -0.05 0.1">
20 |           <geom conaffinity="0" contype="0" density="01" mesh="part3" name="peg" pos="-0.05 0.05 0.0" rgba="0.82 0.71 0.55 1" type="mesh" />
21 |           <geom density="50" name="noviz_collision_1_part3_0" pos="-0.05 0.05  0.16" quat="1 0 0 0" rgba="1 0 0 0" size="0.026 0.165" solref="0.001 1" type="cylinder" />
22 |           <geom density="50" name="noviz_collision_1_part3_1" pos="-0.05 0.05 -0.16" quat="1 0 0 0" rgba="1 0 0 0" size="0.022 0.165" solref="0.001 1" type="cylinder" />
23 | 
24 |           <site name="pegHead" pos="-0.05 0.05 -0.33" size="0.011" rgba="0.0 0 1 0.3"/>
25 |           <site name="pegHead1" pos="-0.08 0.05 -0.31" size="0.014" rgba="0.0 0 1 0.3"/>
26 |           <site name="pegHead2" pos="-0.02 0.05 -0.31" size="0.014" rgba="0.0 0 1 0.3"/>
27 |           <site name="pegHead3" pos="-0.05 0.08 -0.31" size="0.014" rgba="0.0 0 1 0.3"/>
28 |           <site name="pegHead4" pos="-0.05 0.02 -0.31" size="0.014" rgba="0.0 0 1 0.3"/>
29 |           <site name="pegEnd" pos="-0.05 0.05 0.33" size="0.014" rgba="0.0 0 1 0.0"/>
30 |         </body>
31 |         <body name="leftclaw" pos=".01 .02 0">
32 |             <geom name="leftclaw_it0" type="box" pos="0 -.04 0" size="0.05 0.005 0.005"/>
33 |             <joint name="lc_close" type="slide" pos="0 0 0" axis="0 1 0" range="0 .015" user="008" limited="true"/>
34 |         </body>
35 |     </body>
36 | </mujocoinclude>
37 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_chain.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <body name="clawGripper" pos="0 0 .045" quat="-1 0 1 0">
11 |         <geom  type="box" size="0.01 0.04 0.01" />
12 |         <geom size="0.035 0.015" pos="-.024 0 0" type="cylinder" rgba="0.2 0.2 0.2 10" euler="0 1.57 0"/>
13 | 
14 |         <body name="rightclaw" pos=".01 .015 0" >
15 |             <geom name="rightclaw_it" type="box" pos="0 0.0 0" size="0.05 0.005 0.005"/>
16 |             <site name="right_eef" rgba="1.0 0 0 0.0" size="0.01" pos="0.045 0 0"/>
17 |             <joint name="rc_close" type="slide" pos="0 0 0" axis="0 -1 0" range="-0.008 .015" user="008" limited="true"/>
18 |         </body>
19 |         <!-- <site name='grip_site' pos="0.04 0 0" size="0.005" rgba="1 0 0 0.4" /> -->
20 | 
21 |         <body name="leftclaw" pos=".01 .02 0">
22 |             <geom name="leftclaw_it" type="box" pos="0 -.04 0" size="0.05 0.005 0.005"/>
23 |             <site name="left_eef" rgba="1.0 0 0 0.0" size="0.01" pos="0.045 -0.04 0"/>
24 |             <joint name="lc_close" type="slide" pos="0 0 0" axis="0 1 0" range="-0.008 .015" user="008" limited="true"/>
25 |         </body>
26 |     </body>
27 | </mujocoinclude>
28 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_indicator_chain.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <body name="clawGripper_indicator" pos="0 0 .045" quat="-1 0 1 0">
11 |         <geom  type="box" size="0.01 0.04 0.01" contype="0" conaffinity="0" />
12 |         <geom size="0.035 0.015" pos="-.024 0 0" type="cylinder" rgba="0.2 0.2 0.2 0.2" euler="0 1.57 0"/>
13 | 
14 |         <body name="rightclaw_indicator" pos=".01 .015 0" >
15 |             <geom name="rightclaw_it_indicator" type="box" pos="0 0.0 0" size="0.05 0.005 0.005" />
16 |             <joint name="rc_close_indicator" type="slide" pos="0 0 0" axis="0 -1 0" range="0 .015" user="008" limited="true"/>
17 |         </body>
18 | 
19 |         <body name="leftclaw_indicator" pos=".01 .02 0">
20 |             <geom name="leftclaw_it0_indicator" type="box" pos="0 -.04 0" size="0.05 0.005 0.005" />
21 |             <joint name="lc_close_indicator" type="slide" pos="0 0 0" axis="0 1 0" range="0 .015" user="008" limited="true"/>
22 |         </body>
23 |     </body>
24 | </mujocoinclude>
25 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pick_chain.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <!-- <body name="clawGripper" pos="0 0 .045" quat="&#45;1 0 1 0"> -->
11 |     <!--     <geom  type="box" size="0.01 0.04 0.01" /> -->
12 |     <!--     <geom size="0.035 0.015" pos="&#45;.024 0 0" type="cylinder" rgba="0.2 0.2 0.2 10" euler="0 1.57 0"/> -->
13 |     <!--  -->
14 |     <!--     <body name="rightclaw" pos=".01 .015 0" > -->
15 |     <!--         <geom name="rightclaw_it" type="box" pos="0 0.0 0" size="0.05 0.005 0.005"/> -->
16 |     <!--         <site name="right_eef" rgba="1.0 0 0 0.0" size="0.01" pos="0.045 0 0"/> -->
17 |     <!--         <joint name="rc_close" type="slide" pos="0 0 0" axis="0 &#45;1 0" range="&#45;0.008 .015" user="008" limited="true"/> -->
18 |     <!--     </body> -->
19 |     <!--     <!&#45;&#45; <site name='grip_site' pos="0.04 0 0" size="0.005" rgba="1 0 0 0.4" /> &#45;&#45;> -->
20 |     <!--  -->
21 |     <!--     <body name="leftclaw" pos=".01 .02 0"> -->
22 |     <!--         <geom name="leftclaw_it0" type="box" pos="0 &#45;.04 0" size="0.05 0.005 0.005"/> -->
23 |     <!--         <site name="left_eef" rgba="1.0 0 0 0.0" size="0.01" pos="0.045 &#45;0.04 0"/> -->
24 |     <!--         <joint name="lc_close" type="slide" pos="0 0 0" axis="0 1 0" range="&#45;0.008 .015" user="008" limited="true"/> -->
25 |     <!--     </body> -->
26 |     <!-- </body> -->
27 |     <body name="clawGripper" pos="0 0 0.0245" quat="0.707105 0 0 0.707108">
28 |         <!-- <geom  type="box" size="0.01 0.04 0.01" /> -->
29 |         <geom size="0.035 0.015" pos="-.0 0 -0.0145" type="cylinder" rgba="0.2 0.2 0.2 10"  quat="0.707105 0 0 0.707108"/>
30 |         <inertial diaginertia="1e-08 1e-08 1e-08" mass="1e-08" pos="1e-08 1e-08 1e-08" quat="0.820473 0.339851 -0.17592 0.424708" />
31 |         <geom size="1e-08 5e-09" type="cylinder" />
32 |         <body name="right_gripper_base" pos="0 0 0.025">
33 |             <inertial diaginertia="3e-08 2e-08 2e-08" mass="0.3" pos="0 0 0" quat="-0.5 0.5 0.5 0.5" />
34 |             <geom conaffinity="0" contype="0" group="1" mesh="electric_gripper_base" quat="0 0 0.707107 0.707107" type="mesh" />
35 |             <geom quat="0 0 0.707107 0.707107" size="0.029 0.05" type="cylinder" />
36 |             <body name="right_gripper" pos="0 0 0.095">
37 |                 <inertial diaginertia="0 0 0" mass="0.0001" pos="0 0 0" />
38 |                 <site group="1" name="grip_site" pos="0 0 0" rgba="0 0 0 0" size="0.01 0.01 0.01" type="sphere" />
39 |                 <site group="1" name="grip_site_cylinder" pos="0 0 0" rgba="0 0 0 0" size="0.005 10" type="cylinder" />
40 |                 <camera fovy="75" mode="fixed" name="eye_in_hand" pos="0 0 0" quat="0 -1 1 0" />
41 |             </body>
42 |             <body name="rightclaw" pos="0 0.01 0.02">
43 |                 <inertial diaginertia="0.01 0.01 0.01" mass="0.02" pos="0 0 0" quat="0 0 0 -1" />
44 |                 <joint axis="0 -1 0" damping="100" limited="true" name="rc_close" pos="0 0 0" range="-0.0115 0.020833" type="slide" />
45 |                 <geom conaffinity="0" contype="0" group="1" mesh="standard_narrow" name="r_gripper_l_finger" quat="0 0 0 -1" type="mesh" />
46 |                 <geom conaffinity="1" contype="0" friction="0 0 0" name="l_finger_g0" pos="0 0.01725 0.04" quat="0 0 0 -1" size="0.005 0.00675 0.0375" type="box" />
47 |                 <geom conaffinity="1" contype="0" friction="0 0 0" name="l_finger_g1" pos="-0.005 -0.003 0.0083" quat="0 0 0 -1" size="0.005 0.025 0.0085" type="box" />
48 | 
49 |                 <body name="r_gripper_l_finger_tip" pos="0 0.01725 0.075">
50 |                     <inertial diaginertia="0.01 0.01 0.01" mass="0.01" pos="0 0 0" quat="0 0 0 1" />
51 |                     <geom conaffinity="0" contype="0" group="1" mesh="half_round_tip" quat="0 0 0 1" type="mesh" />
52 |                     <geom conaffinity="1" contype="0" friction="0 0 0" name="l_fingertip_g0" pos="0 -0.0045 -0.015" quat="0 0 0 1" size="0.004 0.004 0.0185" type="box" />
53 |                 </body>
54 |             </body>
55 |             <body name="leftclaw" pos="0 -0.01 0.02">
56 |                 <inertial diaginertia="0.01 0.01 0.01" mass="0.02" pos="0 0 0" />
57 |                 <joint axis="0 1 0" damping="100" limited="true" name="lc_close" pos="0 0 0" range="-0.0115 0.020833"  type="slide" />
58 |                 <geom conaffinity="0" contype="0" group="1" mesh="standard_narrow" name="r_gripper_r_finger" type="mesh" />
59 |                 <geom conaffinity="1" contype="0" friction="0 0 0" name="r_finger_g0" pos="0 -0.01725 0.04" size="0.005 0.00675 0.0375" type="box" />
60 |                 <geom conaffinity="1" contype="0" friction="0 0 0" name="r_finger_g1" pos="0.005 0.003 0.0083" size="0.005 0.025 0.0085" type="box" />
61 |                 <body name="r_gripper_r_finger_tip" pos="0 -0.01725 0.075">
62 |                     <inertial diaginertia="0.01 0.01 0.01" mass="0.01" pos="0 0 0" />
63 |                     <geom conaffinity="0" contype="0" group="1" mesh="half_round_tip" type="mesh" />
64 |                     <geom conaffinity="1" contype="0" friction="0 0 0" name="r_fingertip_g0" pos="0 0.0045 -0.015" size="0.004 0.004 0.0185" type="box" />
65 |                 </body>
66 |             </body>
67 |         </body>
68 |     </body>
69 | </mujocoinclude>
70 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pick_indicator_chain.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <!-- <body name="clawGripper_indicator" pos="0 0 .045" quat="&#45;1 0 1 0"> -->
11 |     <!--     <geom  type="box" size="0.01 0.04 0.01" contype="0" conaffinity="0" /> -->
12 |     <!--     <geom size="0.035 0.015" pos="&#45;.024 0 0" type="cylinder" rgba="0.2 0.2 0.2 0.2" euler="0 1.57 0"/> -->
13 |     <!--  -->
14 |     <!--     <body name="rightclaw_indicator" pos=".01 .015 0" > -->
15 |     <!--         <geom name="rightclaw_it_indicator" type="box" pos="0 0.0 0" size="0.05 0.005 0.005" /> -->
16 |     <!--         <joint name="rc_close_indicator" type="slide" pos="0 0 0" axis="0 &#45;1 0" range="0 .015" user="008" limited="true"/> -->
17 |     <!--     </body> -->
18 |     <!--  -->
19 |     <!--     <body name="leftclaw_indicator" pos=".01 .02 0"> -->
20 |     <!--         <geom name="leftclaw_it0_indicator" type="box" pos="0 &#45;.04 0" size="0.05 0.005 0.005" /> -->
21 |     <!--         <joint name="lc_close_indicator" type="slide" pos="0 0 0" axis="0 1 0" range="0 .015" user="008" limited="true"/> -->
22 |     <!--     </body> -->
23 |     <!-- </body> -->
24 | 
25 |     <body name="clawGripper_indicator" pos="0 0 0.0245" quat="0.707105 0 0 0.707108">
26 |         <!-- <geom  type="box" size="0.01 0.04 0.01" /> -->
27 |         <geom size="0.035 0.015" pos="-.0 0 -0.0145" type="cylinder" rgba="0.2 0.2 0.2 0.2"  quat="0.707105 0 0 0.707108"/>
28 |         <geom size="1e-08 5e-09" type="cylinder" />
29 |         <body name="right_gripper_base_indicator" pos="0 0 0.025">
30 |             <geom quat="0 0 0.707107 0.707107" size="0.029 0.05" type="cylinder" />
31 |             <body name="rightclaw_indicator" pos="0 0.01 0.02">
32 |                 <joint axis="0 -1 0" damping="100" limited="true" name="rc_close_indicator" pos="0 0 0" range="-0.0115 0.020833" type="slide" />
33 |                 <geom conaffinity="0" contype="0" friction="0 0 0" name="l_finger_g0_indicator" pos="0 0.01725 0.04" quat="0 0 0 -1" size="0.005 0.00675 0.0375" type="box" rgba="0.2 0.2 0.2 0.2"/>
34 |                 <geom conaffinity="0" contype="0" friction="0 0 0" name="l_finger_g1_indicator" pos="-0.005 -0.003 0.0083" quat="0 0 0 -1" size="0.005 0.025 0.0085" type="box"  rgba="0.2 0.2 0.2 0.2"/>
35 | 
36 |                 <body name="r_gripper_l_finger_tip_indicator" pos="0 0.01725 0.075">
37 |                     <geom conaffinity="0" contype="0" friction="0 0 0" name="l_fingertip_g0_indicator" pos="0 -0.0045 -0.015" quat="0 0 0 1" size="0.004 0.004 0.0185" type="box"  rgba="0.2 0.2 0.2 0.2"/>
38 |                 </body>
39 |             </body>
40 |             <body name="leftclaw_indicator" pos="0 -0.01 0.02">
41 |                 <joint axis="0 1 0" damping="100" limited="true" name="lc_close_indicator" pos="0 0 0" range="-0.0115 0.020833"  type="slide" />
42 |                 <geom conaffinity="0" contype="0" friction="0 0 0" name="r_finger_g0_indicator" pos="0 -0.01725 0.04" size="0.005 0.00675 0.0375" type="box" rgba="0.2 0.2 0.2 0.2"/>
43 |                 <geom conaffinity="0" contype="0" friction="0 0 0" name="r_finger_g1_indicator" pos="0.005 0.003 0.0083" size="0.005 0.025 0.0085" type="box" rgba="0.2 0.2 0.2 0.2" />
44 |                 <body name="r_gripper_r_finger_tip_indicator" pos="0 -0.01725 0.075">
45 |                     <geom conaffinity="0" contype="0" friction="0 0 0" name="r_fingertip_g0_indicator" pos="0 0.0045 -0.015" size="0.004 0.004 0.0185" type="box" rgba="0.2 0.2 0.2 0.2" />
46 |                 </body>
47 |             </body>
48 |         </body>
49 |     </body>
50 | </mujocoinclude>
51 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pick_pos_act.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <actuator>
11 |         <position joint="rc_close" ctrlrange="-0.0115 0.020833" class='sawyer_gripper' forcelimited="true" forcerange="-20 20" kp="10000"/>
12 |         <position joint="lc_close" ctrlrange="-0.0115 0.020833" class='sawyer_gripper' forcelimited="true" forcerange="-20 20" kp="10000"/>
13 |     </actuator>
14 | 
15 | </mujocoinclude>
16 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pick_target_chain.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <!-- <body name="clawGripper_target" pos="0 0 .045" quat="&#45;1 0 1 0"> -->
11 |     <!--     <geom  type="box" size="0.01 0.04 0.01" /> -->
12 |     <!--     <geom size="0.035 0.015" pos="&#45;.024 0 0" type="cylinder" rgba="0.2 0.2 0.2 10" euler="0 1.57 0"/> -->
13 |     <!--  -->
14 |     <!--     <body name="rightclaw_target" pos=".01 .015 0" > -->
15 |     <!--         <geom name="rightclaw_it_target" type="box" pos="0 0.0 0" size="0.05 0.005 0.005"/> -->
16 |     <!--         <joint name="rc_close_target" type="slide" pos="0 0 0" axis="0 &#45;1 0" range="0 .015" user="008" limited="true"/> -->
17 |     <!--     </body> -->
18 |     <!--  -->
19 |     <!--     <body name="leftclaw_target" pos=".01 .02 0"> -->
20 |     <!--         <geom name="leftclaw_it0_target" type="box" pos="0 &#45;.04 0" size="0.05 0.005 0.005"/> -->
21 |     <!--         <joint name="lc_close_target" type="slide" pos="0 0 0" axis="0 1 0" range="0 .015" user="008" limited="true"/> -->
22 |     <!--     </body> -->
23 |     <!-- </body> -->
24 |     <body name="clawGripper_target" pos="0 0 0.0245" quat="0.707105 0 0 0.707108">
25 |         <!-- <geom  type="box" size="0.01 0.04 0.01" /> -->
26 |         <geom size="0.035 0.015" pos="-.0 0 -0.0145" type="cylinder" rgba="0.2 0.2 0.2 0.2"  quat="0.707105 0 0 0.707108"/>
27 |         <geom size="1e-08 5e-09" type="cylinder" />
28 |         <body name="right_gripper_base_target" pos="0 0 0.025">
29 |             <geom quat="0 0 0.707107 0.707107" size="0.029 0.05" type="cylinder" />
30 |             <body name="rightclaw_target" pos="0 0.01 0.02">
31 |                 <joint axis="0 -1 0" damping="100" limited="true" name="rc_close_target" pos="0 0 0" range="-0.0115 0.020833" type="slide" />
32 |                 <geom conaffinity="0" contype="0" friction="0 0 0" name="l_finger_g0_target" pos="0 0.01725 0.04" quat="0 0 0 -1" size="0.005 0.00675 0.0375" type="box" rgba="0.2 0.2 0.2 0.2"/>
33 |                 <geom conaffinity="0" contype="0" friction="0 0 0" name="l_finger_g1_target" pos="-0.005 -0.003 0.0083" quat="0 0 0 -1" size="0.005 0.025 0.0085" type="box" rgba="0.2 0.2 0.2 0.2"/>
34 | 
35 |                 <body name="r_gripper_l_finger_tip_target" pos="0 0.01725 0.075">
36 |                     <geom conaffinity="0" contype="0" friction="0 0 0" name="l_fingertip_g0_target" pos="0 -0.0045 -0.015" quat="0 0 0 1" size="0.004 0.004 0.0185" type="box" rgba="0.2 0.2 0.2 0.2"/>
37 |                 </body>
38 |             </body>
39 |             <body name="leftclaw_target" pos="0 -0.01 0.02">
40 |                 <joint axis="0 1 0" damping="100" limited="true" name="lc_close_target" pos="0 0 0" range="-0.0115 0.020833"  type="slide" />
41 |                 <geom conaffinity="0" contype="0" friction="0 0 0" name="r_finger_g0_target" pos="0 -0.01725 0.04" size="0.005 0.00675 0.0375" type="box" rgba="0.2 0.2 0.2 0.2"/>
42 |                 <geom conaffinity="0" contype="0" friction="0 0 0" name="r_finger_g1_target" pos="0.005 0.003 0.0083" size="0.005 0.025 0.0085" type="box" rgba="0.2 0.2 0.2 0.2"/>
43 |                 <body name="r_gripper_r_finger_tip_target" pos="0 -0.01725 0.075">
44 |                     <geom conaffinity="0" contype="0" friction="0 0 0" name="r_fingertip_g0_target" pos="0 0.0045 -0.015" size="0.004 0.004 0.0185" type="box" rgba="0.2 0.2 0.2 0.2"/>
45 |                 </body>
46 |             </body>
47 |         </body>
48 |     </body>
49 | </mujocoinclude>
50 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pos_act.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <actuator>
11 |         <position joint="rc_close" ctrlrange="-0.008 0.015" class='sawyer_gripper'/>
12 |         <position joint="lc_close" ctrlrange="-0.008 0.015" class='sawyer_gripper'/>
13 |     </actuator>
14 | 
15 | </mujocoinclude>
16 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_target_chain.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <body name="clawGripper_target" pos="0 0 .045" quat="-1 0 1 0">
11 |         <geom  type="box" size="0.01 0.04 0.01" />
12 |         <geom size="0.035 0.015" pos="-.024 0 0" type="cylinder" rgba="0.2 0.2 0.2 10" euler="0 1.57 0"/>
13 | 
14 |         <body name="rightclaw_target" pos=".01 .015 0" >
15 |             <geom name="rightclaw_it_target" type="box" pos="0 0.0 0" size="0.05 0.005 0.005"/>
16 |             <joint name="rc_close_target" type="slide" pos="0 0 0" axis="0 -1 0" range="0 .015" user="008" limited="true"/>
17 |         </body>
18 | 
19 |         <body name="leftclaw_target" pos=".01 .02 0">
20 |             <geom name="leftclaw_it0_target" type="box" pos="0 -.04 0" size="0.05 0.005 0.005"/>
21 |             <joint name="lc_close_target" type="slide" pos="0 0 0" axis="0 1 0" range="0 .015" user="008" limited="true"/>
22 |         </body>
23 |     </body>
24 | </mujocoinclude>
25 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/materials.xml:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Common textures, colors and materials to be used throughout this suite. Some
 3 | materials such as xxx_highlight are activated on occurence of certain events,
 4 | for example receiving a positive reward.
 5 | -->
 6 | <mujoco>
 7 |     <asset>
 8 |         <texture name="grid" type="2d" builtin="checker" rgb1=".5 .5 .5" rgb2=".5 .5 .5" width="37.5" height="37.5" mark="none"/>
 9 |         <material name="grid" texture="grid" texrepeat="20 20" reflectance="0"/>
10 |         <material name="self" rgba=".7 .5 .3 1"/>
11 |         <material name="self_default" rgba=".7 .5 .3 1"/>
12 |         <material name="self_highlight" rgba="0 .5 .3 1"/>
13 |         <material name="effector" rgba=".7 .4 .2 1"/>
14 |         <material name="effector_default" rgba=".7 .4 .2 1"/>
15 |         <material name="effector_highlight" rgba="0 .5 .3 1"/>
16 |         <material name="decoration" rgba=".3 .5 .7 1"/>
17 |         <material name="eye" rgba="0 .2 1 1"/>
18 |         <material name="target" rgba=".6 .3 .3 1"/>
19 |         <material name="target_default" rgba=".6 .3 .3 1"/>
20 |         <material name="target_highlight" rgba=".6 .3 .3 .4"/>
21 |     </asset>
22 | </mujoco>
23 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model='sawyer-v2.0'>
 2 | <!-- ======================================================
 3 |     Model       :: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts    : vikashplus@gmail.com
 6 |         Last edits  : 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <compiler meshdir=""/>
11 | 
12 |     <include file="./common/sawyer_dependencies.xml"/>
13 |     <include file="./common/basic_scene.xml"/>
14 |     <worldbody>
15 |         <!-- <light mode='targetbodycom' target='torso' directional='false' diffuse='.8 .8 .8' specular='0.3 0.3 0.3' pos='0 0 4.0' dir='0 0 &#45;1'/> -->
16 |         <!-- <light castshadow="false" diffuse=".8 .8 .8" dir="0 &#45;.15 &#45;1" directional="false" pos="1.56 1.0 4.0" specular="0.3 0.3 0.3" /> -->
17 |         <!-- <light castshadow="false" diffuse=".8 .8 .8" dir="0 &#45;.15 &#45;1" directional="false" pos="&#45;2.44 &#45;3.0 4.0" specular="0.3 0.3 0.3" /> -->
18 |         <include file="./common/sawyer_chain.xml"/> <!--gripper is attached & configured here -->
19 |         <include file="./common/sawyer_indicator_chain.xml"/>
20 |         <include file="./common/sawyer_target_chain.xml"/>
21 |     </worldbody>
22 | 
23 |     <!-- Choose actuator options -->
24 |     <include file="./common/sawyer_joint_pos_act.xml"/>
25 |     <!-- <include file="../sawyer_sim/assets/sawyer_joint_tor_act.xml"/>     -->
26 |     <!-- <include file="../sawyer_sim/assets/sawyer_mocap_ee_act.xml"/>     -->
27 |     <include file="./common/gripper_pos_act.xml"/>
28 | </mujoco>
29 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_assembly.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model='sawyer-v2.0'>
 2 | <!-- ======================================================
 3 |     Model       :: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts    : vikashplus@gmail.com
 6 |         Last edits  : 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <asset>
11 |         <mesh file="../meshes/toy_table/3.stl" name="part3" scale="0.042025927036999993 0.042025927036999993 0.082025927036999993" />
12 |     </asset>
13 |     <compiler meshdir=""/>
14 | 
15 |     <include file="./common/sawyer_dependencies.xml"/>
16 |     <include file="./common/basic_scene.xml"/>
17 |     <worldbody>
18 |         <!-- <light mode='targetbodycom' target='torso' directional='false' diffuse='.8 .8 .8' specular='0.3 0.3 0.3' pos='0 0 4.0' dir='0 0 &#45;1'/> -->
19 |         <!-- <light castshadow="false" diffuse=".8 .8 .8" dir="0 &#45;.15 &#45;1" directional="false" pos="1.56 1.0 4.0" specular="0.3 0.3 0.3" /> -->
20 |         <!-- <light castshadow="false" diffuse=".8 .8 .8" dir="0 &#45;.15 &#45;1" directional="false" pos="&#45;2.44 &#45;3.0 4.0" specular="0.3 0.3 0.3" /> -->
21 |         <include file="./common/sawyer_assembly_chain.xml"/> <!--gripper is attached & configured here -->
22 |         <include file="./common/sawyer_indicator_chain.xml"/>
23 |         <include file="./common/sawyer_target_chain.xml"/>
24 |     </worldbody>
25 | 
26 |     <!-- Choose actuator options -->
27 |     <include file="./common/sawyer_joint_pos_act.xml"/>
28 |     <!-- <include file="../sawyer_sim/assets/sawyer_joint_tor_act.xml"/>     -->
29 |     <!-- <include file="../sawyer_sim/assets/sawyer_mocap_ee_act.xml"/>     -->
30 |     <!-- <include file="./common/gripper_pos_act.xml"/> -->
31 | </mujoco>
32 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_dependencies.xml:
--------------------------------------------------------------------------------
 1 | <mujoco>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <compiler angle="radian"/>
11 |     <option iterations="50" integrator="Euler" timestep="0.002" cone="elliptic" noslip_iterations="5" tolerance="1e-10"/>
12 | 
13 |     <asset>
14 |         <mesh name="pedestal" file="../meshes/sawyer/pedestal.stl" />
15 |         <mesh name="base" file="../meshes/sawyer/base.stl" />
16 |         <mesh name="l0" file="../meshes/sawyer/l0.stl" />
17 |         <mesh name="head" file="../meshes/sawyer/head.stl" />
18 |         <mesh name="l1" file="../meshes/sawyer/l1.stl" />
19 |         <mesh name="l2" file="../meshes/sawyer/l2.stl" />
20 |         <mesh name="l3" file="../meshes/sawyer/l3.stl" />
21 |         <mesh name="l4" file="../meshes/sawyer/l4.stl" />
22 |         <mesh name="l5" file="../meshes/sawyer/l5.stl" />
23 |         <mesh name="l6" file="../meshes/sawyer/l6.stl" />
24 |         <mesh file="../meshes/two_finger_gripper/electric_gripper_base.STL" name="electric_gripper_base" />
25 |         <mesh file="../meshes/two_finger_gripper/standard_narrow.STL" name="standard_narrow" />
26 |         <mesh file="../meshes/two_finger_gripper/half_round_tip.STL" name="half_round_tip" />
27 |     </asset>
28 | 
29 |     <contact>
30 |         <exclude body1="right_arm_base_link" body2="right_l0"/>
31 |     </contact>
32 | 
33 |     <default>
34 |         <default class='sawyer'>
35 |             <joint limited="true" damping="10" stiffness="0" armature=".1" user="0"/>
36 |             <geom solref="0.008 1" solimp="0.95 0.95 0.01" margin="0.001" user="0" rgba="0.2 0.2 0.2 1" group="3"/>
37 |             <motor ctrllimited="true" ctrlrange="-100 100"/>
38 |             <position ctrllimited="true" forcelimited="true"/>
39 |             
40 |             <default class='sawyer_col'>
41 |                 <geom rgba=".4 .5 .6 1" group="4"/>
42 |             </default>
43 |             <default class='sawyer_viz'>
44 |                 <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.5 0.1 0.1 1"/>
45 |             </default>
46 | 
47 |             <default class='sawyer_col_indicator'>
48 |                 <geom rgba=".4 .5 .6 0.2" group="0" contype="0" conaffinity="0"/>
49 |             </default>
50 |             <default class='sawyer_viz_indicator'>
51 |                 <geom type="mesh" contype="0" conaffinity="0" group="0" rgba="0.4 0.5 0.6 0.2"/>
52 |             </default>
53 | 
54 |             <default class='sawyer_col_target'>
55 |                 <geom rgba="0. 0.4 0. 0.5" group="0" contype="0" conaffinity="0"/>
56 |             </default>
57 |             <default class='sawyer_viz_target'>
58 |                 <geom type="mesh" contype="0" conaffinity="0" group="0" rgba="0. 0.4 0. 0.5"/>
59 |             </default>
60 | 
61 | 
62 |             <default class='sawyer_gripper'>
63 |                 <joint damping="100" armature='5'/>
64 |                 <!-- <geom friction="1 0.5 0.001" rgba=".7 .6 .55 1" contype="2" conaffinity="2" condim="6" group="1"/> -->
65 |                 <geom friction="1 0.5 0.001" rgba=".7 .6 .55 1" condim="6" group="1"/>
66 |                 <position kp="500" forcerange="-70 70"/>
67 |             </default>
68 | 
69 |             <default class='sawyer_gripper_indicator'>
70 |                 <joint damping="100" armature='5'/>
71 |                 <geom friction="1 0.5 0.001" rgba=".7 .6 .55 2" contype="0" conaffinity="0" condim="6" group="0"/>
72 |                 <position kp="500" forcerange="-70 70"/>
73 |             </default>
74 | 
75 |             <default class='sawyer_gripper_target'>
76 |                 <joint damping="100" armature='5'/>
77 |                 <geom friction="1 0.5 0.001" rgba="0.6 0.5 0.1 0.2" contype="0" conaffinity="0" condim="6" group="0"/>
78 |                 <position kp="500" forcerange="-70 70"/>
79 |             </default>
80 |         </default>
81 |     </default>
82 | 
83 | </mujoco>
84 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_joint_pos_act.xml:
--------------------------------------------------------------------------------
 1 | <mujocoinclude>
 2 | <!-- ======================================================
 3 |     Model 		:: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts 	: vikashplus@gmail.com
 6 |         Last edits 	: 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <actuator>
11 |         <position joint="right_j0" ctrlrange="-3.04 3.04" forcerange="-100 100" kp="500" class='sawyer'/>
12 |         <position joint="right_j1" ctrlrange="-3.80 3.80" forcerange="-100 100" kp="500" class='sawyer'/>
13 |         <position joint="right_j2" ctrlrange="-3.04 3.04" forcerange="-75 75" kp="200" class='sawyer'/>
14 |         <position joint="right_j3" ctrlrange="-3.04 3.04" forcerange="-75 75" kp="200" class='sawyer'/>
15 |         <position joint="right_j4" ctrlrange="-2.98 2.98" forcerange="-50 50" kp="50" class='sawyer'/>
16 |         <position joint="right_j5" ctrlrange="-2.98 2.98" forcerange="-50 50" kp="50" class='sawyer'/>
17 |         <position joint="right_j6" ctrlrange="-4.71 4.71" forcerange="-50 50" kp="50" class='sawyer'/>
18 |     </actuator>
19 | 
20 | </mujocoinclude>


--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_no_gripper.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model='sawyer-v2.0'>
 2 | <!-- ======================================================
 3 |     Model       :: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts    : vikashplus@gmail.com
 6 |         Last edits  : 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <compiler meshdir=""/>
11 | 
12 |     <include file="./common/sawyer_dependencies.xml"/>
13 |     <include file="./common/basic_scene.xml"/>
14 |     <worldbody>
15 |         <!-- <light mode='targetbodycom' target='torso' directional='false' diffuse='.8 .8 .8' specular='0.3 0.3 0.3' pos='0 0 4.0' dir='0 0 &#45;1'/> -->
16 |         <!-- <light castshadow="false" diffuse=".8 .8 .8" dir="0 &#45;.15 &#45;1" directional="false" pos="1.56 1.0 4.0" specular="0.3 0.3 0.3" /> -->
17 |         <!-- <light castshadow="false" diffuse=".8 .8 .8" dir="0 &#45;.15 &#45;1" directional="false" pos="&#45;2.44 &#45;3.0 4.0" specular="0.3 0.3 0.3" /> -->
18 |         <include file="./common/sawyer_no_gripper_chain.xml"/> <!--gripper is attached & configured here -->
19 |         <include file="./common/sawyer_no_gripper_indicator_chain.xml"/>
20 |         <include file="./common/sawyer_no_gripper_target_chain.xml"/>
21 |     </worldbody>
22 | 
23 |     <!-- Choose actuator options -->
24 |     <include file="./common/sawyer_joint_pos_act.xml"/>
25 |     <!-- <include file="../sawyer_sim/assets/sawyer_joint_tor_act.xml"/>     -->
26 |     <!-- <include file="../sawyer_sim/assets/sawyer_mocap_ee_act.xml"/>     -->
27 |     <!-- <include file="./common/gripper_pos_act.xml"/> -->
28 | </mujoco>
29 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_pick.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model='sawyer-v2.0'>
 2 | <!-- ======================================================
 3 |     Model       :: Sawyer (MuJoCoV2.0)
 4 |     Author      :: Vikash Kumar
 5 |         Contacts    : vikashplus@gmail.com
 6 |         Last edits  : 3Apr'20, 25Aug'18
 7 |     Copyright 2018 Vikash Kumar
 8 |         Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 9 | ====================================================== -->
10 |     <compiler meshdir=""/>
11 | 
12 |     <include file="./common/sawyer_dependencies.xml"/>
13 |     <include file="./common/basic_scene.xml"/>
14 |     <worldbody>
15 |         <!-- <light mode='targetbodycom' target='torso' directional='false' diffuse='.8 .8 .8' specular='0.3 0.3 0.3' pos='0 0 4.0' dir='0 0 &#45;1'/> -->
16 |         <!-- <light castshadow="false" diffuse=".8 .8 .8" dir="0 &#45;.15 &#45;1" directional="false" pos="1.56 1.0 4.0" specular="0.3 0.3 0.3" /> -->
17 |         <!-- <light castshadow="false" diffuse=".8 .8 .8" dir="0 &#45;.15 &#45;1" directional="false" pos="&#45;2.44 &#45;3.0 4.0" specular="0.3 0.3 0.3" /> -->
18 |         <include file="./common/sawyer_pick_chain.xml"/> <!--gripper is attached & configured here -->
19 |         <include file="./common/sawyer_pick_indicator_chain.xml"/>
20 |         <include file="./common/sawyer_pick_target_chain.xml"/>
21 |     </worldbody>
22 | 
23 |     <!-- Choose actuator options -->
24 |     <include file="./common/sawyer_joint_pos_act.xml"/>
25 |     <!-- <include file="../sawyer_sim/assets/sawyer_joint_tor_act.xml"/>     -->
26 |     <!-- <include file="../sawyer_sim/assets/sawyer_mocap_ee_act.xml"/>     -->
27 |     <include file="./common/gripper_pick_pos_act.xml"/>
28 | </mujoco>
29 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/skybox.xml:
--------------------------------------------------------------------------------
 1 | <mujoco>
 2 |     <asset>
 3 |         <!--
 4 |         <texture name="skybox" type="skybox" builtin="gradient" rgb1=".4 .6 .8" rgb2="0 0 0"
 5 |                width="800" height="800" mark="random" markrgb="1 1 1"/>
 6 |         -->
 7 |         <texture name="skybox" type="skybox" builtin="gradient" rgb1="0.556 0.705 0.960" rgb2="0.556 0.705 0.960"
 8 |             width="800" height="800" mark="none"/>
 9 |         <!-- <texture name="skybox" type="skybox" file="../textures/room1.png" gridsize="1 1" /> -->
10 |     </asset>
11 | </mujoco>
12 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/target.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="target">
 2 |   <worldbody>
 3 |     <body>
 4 |       <body name="visual">
 5 |         <geom pos="0 0 0" type="box" rgba="1 0 0 0.3"  conaffinity="0" contype="0" group="0"  size="0.03 0.03 0.03"/>
 6 |         <geom pos="0 0 0" type="box" rgba="1 0 0 0.3"  conaffinity="0" contype="0" group="1"  size="0.03 0.03 0.03"/>
 7 |       </body>
 8 |       <site rgba="0 0 0 0" size="0.005" pos="-0.6 -0.6 -0.2" name="bottom_site"/>
 9 |       <site rgba="0 0 0 0" size="0.005" pos="0.5 0.5 0.1" name="top_site"/>
10 |       <site rgba="0 0 0 0" size="0.005" pos="0.2 0.2 0.2" name="horizontal_radius_site"/>
11 |     </body>
12 |   </worldbody>
13 | </mujoco>
14 | 


--------------------------------------------------------------------------------
/env/assets/xml/common/visual.xml:
--------------------------------------------------------------------------------
1 | <mujoco>
2 |   <visual>
3 |     <headlight ambient=".4 .4 .4" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
4 |     <map znear=".01"/>
5 |     <quality shadowsize="2048"/>
6 |   </visual>
7 | </mujoco>
8 | 


--------------------------------------------------------------------------------
/env/assets/xml/pusher_obstacle.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="pusher_obstacle">
 2 |     <include file="./common/pusher_gripper.xml" />
 3 | 
 4 |     <default>
 5 |         <geom contype="0" friction="1 0.1 0.1" />
 6 |         <default class="obstacle">
 7 |             <geom conaffinity="0" contype="1" rgba="0.55 0.25 0.07 1" />
 8 |         </default>
 9 |     </default>
10 | 
11 |     <worldbody>
12 |         <body name="obstacle1" pos="0.08 0.13 0.01">
13 |             <geom name="obstacle1_geom" size="0.16 0.06 0.05" type="box" class="obstacle" />
14 |         </body>
15 |         <body name="obstacle2" pos="0.17 -0.11 0.01">
16 |             <geom name="obstacle2_geom" size="0.02 0.03 0.05" type="box" class="obstacle" />
17 |         </body>
18 |         <body name="obstacle3" pos="-0.16 -0.35 0.01">
19 |             <geom name="obstacle3_geom" size="0.08 0.08 0.05" type="box" class="obstacle" />
20 |         </body>
21 |         <body name="obstacle4" pos="-0.33 -0.1 0.01">
22 |             <geom name="obstacle4_geom" size="0.06 0.12 0.05" type="box" class="obstacle" />
23 |         </body>
24 |         <body name="obstacle5" pos="-0.30 0.04 0.01">
25 |             <geom name="obstacle5_geom" size="0.09 0.02 0.05" type="box" class="obstacle" />
26 |         </body>
27 |         <body name="obstacle6" pos="0.06 -0.22 0.01">
28 |             <geom name="obstacle6_geom" size="0.02 0.01 0.05" type="box" class="obstacle" />
29 |         </body>
30 |         <body name="obstacle7" pos="-0.12 -0.12 0.01">
31 |             <geom name="obstacle7_geom" size="0.01 0.01 0.05" type="box" class="obstacle" />
32 |         </body>
33 |     </worldbody>
34 | </mujoco>
35 | 


--------------------------------------------------------------------------------
/env/assets/xml/sawyer.xml:
--------------------------------------------------------------------------------
1 | <mujoco model="sawyer">
2 |     <include file="./common/sawyer.xml" />
3 | </mujoco>
4 | 


--------------------------------------------------------------------------------
/env/assets/xml/sawyer_assembly.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="sawyer_assembly">
 2 |     <asset>
 3 |     <texture file="../robosuite/textures/light-wood.png" name="tex-light-wood" type="2d" />
 4 |     <material name="light-wood" reflectance="0.5" texrepeat="15 15" texture="tex-light-wood" texuniform="true" />
 5 |     <mesh file="../meshes/toy_table/0.stl" name="part0" scale="0.032025927036999993 0.032025927036999993 0.097025927036999993" />
 6 |     <mesh file="../meshes/toy_table/4.stl" name="part4" scale="0.047025927036999993 0.047025927036999993 0.047025927036999993" />
 7 |     <!-- <mesh file="../meshes/toy_table/3.stl" name="part3" scale="0.047025927036999993 0.047025927036999993 0.077025927036999993" /> -->
 8 |     <mesh file="../meshes/toy_table/2.stl" name="part2" scale="0.032025927036999993 0.032025927036999993 0.097025927036999993" />
 9 |     <mesh file="../meshes/toy_table/1.stl" name="part1" scale="0.032025927036999993 0.032025927036999993 0.097025927036999993" />
10 |     </asset>
11 |     <include file="./common/sawyer_assembly.xml" />
12 |     <default>
13 |     </default>
14 |     <visual>
15 |       <headlight ambient=".5 .5 .5" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
16 |       <map znear=".01"/>
17 |       <quality shadowsize="2048"/>
18 |     </visual>
19 |     <worldbody>
20 |       <body name="table" pos="0.66 0.0 0.41">
21 |         <geom friction="1 0.005 0.0001" name="table_collision" pos="0 0 0" size="0.4 0.6 0.41" type="box"  rgba="0.3 0.3 0.3 1.0"/>
22 |         <geom conaffinity="0" contype="0" group="1" name="table_visual" pos="0 0 0" size="0.4 0.4 0.41" type="box" rgba="0.3 0.3 0.3 1.0" />
23 |         <site name="table_top" pos="0.0 0.0 0.41" rgba="0 0 0 0" size="0.001 0.001 0.001" />
24 |      </body>
25 |      <body name="furniture" pos="0.9 -0.1 1.08" quat="1 0 0 0.5">
26 |         <body name="4_part4" pos="-0.17516966081204696 0.17516964229908347 -0.22027505841291783" quat="0 0 -1 0">
27 |           <geom conaffinity="0" contype="0" density="50" mesh="part4" name="4_part4_mesh" pos="0.0 0.0 0.0" rgba="0.82 0.71 0.55 1" type="mesh" />
28 |           <site name="hole" pos="0.225 -0.225 -0.04" size="0.005" rgba="0 0. 1 0.3"/>
29 |           <site name="hole_bottom" pos="0.225 -0.225 0.039" size="0.005" rgba="0 0. 1 0.3"/>
30 | 
31 |           <geom density="50" quat="1         0 0          0" name="noviz_collision_4_part4_16" pos="0.225 -0.268 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
32 |           <geom density="50" quat="1         0 0          0" name="noviz_collision_4_part4_17" pos="0.225 -0.182 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
33 | 
34 |           <geom density="50" quat="0.7071067 0 0 -0.7071068" name="noviz_collision_4_part4_18" pos="0.268 -0.225 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
35 |           <geom density="50" quat="0.7071067 0 0 -0.7071068" name="noviz_collision_4_part4_19" pos="0.182 -0.225 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
36 | 
37 |           <geom density="50" quat="0.9238796 0 0  0.3826834" name="noviz_collision_4_part4_20" pos="0.255 -0.253 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
38 |           <geom density="50" quat="0.9238796 0 0 -0.3826834" name="noviz_collision_4_part4_21" pos="0.255 -0.197 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
39 | 
40 |           <geom density="50" quat="0.9238796 0 0 -0.3826834" name="noviz_collision_4_part4_22" pos="0.195 -0.253 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
41 |           <geom density="50" quat="0.9238796 0 0  0.3826834" name="noviz_collision_4_part4_23" pos="0.195 -0.197 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
42 |         </body>
43 |      </body>
44 | 
45 |     </worldbody>
46 | </mujoco>
47 | 


--------------------------------------------------------------------------------
/env/assets/xml/sawyer_assembly_obstacle.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="sawyer_assembly">
 2 |     <asset>
 3 |     <texture file="../textures/light-wood.png" name="tex-light-wood" type="2d" />
 4 |     <material name="light-wood" reflectance="0.5" texrepeat="15 15" texture="tex-light-wood" texuniform="true" />
 5 |     <mesh file="../meshes/toy_table/0.stl" name="part0" scale="0.042025927036999993 0.042025927036999993 0.077025927036999993" />
 6 |     <mesh file="../meshes/toy_table/1.stl" name="part1" scale="0.042025927036999993 0.042025927036999993 0.077025927036999993" />
 7 |     <mesh file="../meshes/toy_table/4.stl" name="part4" scale="0.047025927036999993 0.047025927036999993 0.047025927036999993" />
 8 |     <mesh file="../meshes/toy_table/2.stl" name="part2" scale="0.042025927036999993 0.042025927036999993 0.077025927036999993" />
 9 |     </asset>
10 |     <include file="./common/sawyer_assembly.xml" />
11 |     <default>
12 |     </default>
13 |     <visual>
14 |       <headlight ambient=".5 .5 .5" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
15 |       <map znear=".01"/>
16 |       <quality shadowsize="2048"/>
17 |     </visual>
18 |     <worldbody>
19 |       <body name="table" pos="0.66 0.0 0.41">
20 |         <geom friction="1 0.005 0.0001" name="table_collision" pos="0 0 0" size="0.4 0.6 0.41" type="box"  rgba="0.3 0.3 0.3 1.0"/>
21 |         <geom conaffinity="0" contype="0" group="1" name="table_visual" pos="0 0 0" size="0.4 0.4 0.41" type="box" rgba="0.3 0.3 0.3 1.0" />
22 |         <site name="table_top" pos="0.0 0.0 0.41" rgba="0 0 0 0" size="0.001 0.001 0.001" />
23 |      </body>
24 |      <body name="furniture" pos="0.9 -0.1 1.08" quat="1 0 0 0.5">
25 |         <joint type='free' damping='0.0001' />
26 |         <body name="4_part4" pos="-0.17516966081204696 0.17516964229908347 -0.22027505841291783" quat="0 0 -1 0">
27 |           <geom conaffinity="0" contype="0" density="50" mesh="part4" name="4_part4_mesh" pos="0.0 0.0 0.0" rgba="0.82 0.71 0.55 1" type="mesh" />
28 |           <site name="hole" pos="0.225 -0.225 -0.00" size="0.005" rgba="0 0. 1 0.3"/>
29 |           <site name="hole_bottom" pos="0.225 -0.225 0.02" size="0.005" rgba="0 0. 1 0.3"/>
30 | 
31 |           <geom density="50" quat="1         0 0          0" name="noviz_collision_4_part4_16" pos="0.225 -0.268 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
32 |           <geom density="50" quat="1         0 0          0" name="noviz_collision_4_part4_17" pos="0.225 -0.182 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
33 | 
34 |           <geom density="50" quat="0.7071067 0 0 -0.7071068" name="noviz_collision_4_part4_18" pos="0.268 -0.225 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
35 |           <geom density="50" quat="0.7071067 0 0 -0.7071068" name="noviz_collision_4_part4_19" pos="0.177 -0.225 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
36 | 
37 |           <geom density="50" quat="0.9238796 0 0  0.3826834" name="noviz_collision_4_part4_20" pos="0.255 -0.253 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
38 |           <geom density="50" quat="0.9238796 0 0 -0.3826834" name="noviz_collision_4_part4_21" pos="0.255 -0.197 -0.00268" rgba="1 0 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
39 | 
40 |           <geom density="50" quat="0.9238796 0 0 -0.3826834" name="noviz_collision_4_part4_22" pos="0.195 -0.258 -0.00268" rgba="1 1 1 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
41 |           <geom density="50" quat="0.9238796 0 0  0.3826834" name="noviz_collision_4_part4_23" pos="0.195 -0.190 -0.00268" rgba="1 1 0 0" size="0.02 0.015 0.03" solref="0.001 1" type="box" />
42 |           <geom density="50" name="noviz_collision_4_part4_24" pos="0 0 0.032" rgba="1 1 0 0" size="0.2 0.2 0.01" solref="0.001 1" type="box" />
43 |           <geom density="50" name="noviz_collision_4_part4_25" pos="0.228 0 0.032" rgba="1 1 0 0" size="0.028 0.185 0.01" solref="0.001 0" type="box" />
44 |           <geom density="50" name="noviz_collision_4_part4_26" pos="-0.228 0 0.032" rgba="1 1 0 0" size="0.028 0.185 0.01" solref="0.001 0" type="box" />
45 |           <geom density="50" name="noviz_collision_4_part4_27" pos="0 -0.228 0.032" rgba="1 1 0 0" size="0.185 0.028 0.01" solref="0.001 0" type="box" />
46 |           <geom density="50" name="noviz_collision_4_part4_28" pos="0 0.228 0.032" rgba="1 1 0 0" size="0.185 0.028 0.01" solref="0.001 0" type="box" />
47 |         </body>
48 | 
49 |         <body name="2_part2" pos="-0.0 0.3503393216240939 0.1" quat="1 0 0 0">
50 |           <geom conaffinity="0" contype="0" density="01" mesh="part2" name="2_part2_mesh" pos="0.05 0.05 0.0" rgba="0.82 0.71 0.55 1" type="mesh" />
51 |           <geom density="50" name="noviz_collision_2_part2_0" pos="0.05 0.05 0.15" quat="1 0 0 0" rgba="1 0 0 0" size="0.027 0.16" solref="0.001 1" type="cylinder" />
52 |           <geom density="50" name="noviz_collision_2_part2_1" pos="0.05 0.05 -0.18" quat="1 0 0 0" rgba="1 0 0 0" size="0.025 0.17" solref="0.001 1" type="cylinder" />
53 |         </body>
54 | 
55 |         <body name="1_part1" pos="-0.3503393216240939 0.3503393216240939 0.1" quat="1 0 0 0">
56 |           <geom conaffinity="0" contype="0" density="01" mesh="part1" name="1_part1_mesh" pos="-0.05 0.05 0.0" rgba="0.82 0.71 0.55 1" type="mesh" />
57 |           <geom density="50" name="noviz_collision_1_part1_0" pos="-0.05 0.05 0.15" quat="1 0 0 0" rgba="1 0 0 0" size="0.027 0.16" solref="0.001 1" type="cylinder" />
58 |           <geom density="50" name="noviz_collision_1_part1_1" pos="-0.05 0.05 -0.18" quat="1 0 0 0" rgba="1 0 0 0" size="0.025 0.17" solref="0.001 1" type="cylinder" />
59 |         </body>
60 | 
61 |         <body name="0_part0" pos="-0.0 0.0 0.1" quat="1 0 0 0">
62 |           <geom conaffinity="0" contype="0" density="0" mesh="part0" name="0_part0_mesh" pos="0.05 -0.05 0.0" rgba="0.82 0.71 0.55 1" type="mesh" />
63 |           <geom density="50" name="noviz_collision_0_part0_0" pos="0.05 -0.05 0.15" quat="1 0 0 0" rgba="1 0 0 0" size="0.027 0.16" solref="0.001 1" type="cylinder" />
64 |           <geom density="50" name="noviz_collision_0_part0_1" pos="0.05 -0.05 -0.18" quat="1 0 0 0" rgba="1 0 0 0" size="0.025 0.17" solref="0.001 1" type="cylinder" />
65 |         </body>
66 | 
67 |      </body>
68 | 
69 |     </worldbody>
70 | </mujoco>
71 | 


--------------------------------------------------------------------------------
/env/assets/xml/sawyer_lift.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="sawyer_pick_place">
 2 |     <include file="./common/sawyer_pick.xml" />
 3 |     <default>
 4 |     </default>
 5 |     <asset>
 6 |         <texture file="../textures/light-wood.png" name="tex-light-wood" type="2d" />
 7 |         <texture file="../textures/dark-wood.png" name="tex-dark-wood" type="2d" />
 8 |         <material name="light-wood" reflectance="0.5" texrepeat="15 15" texture="tex-light-wood" texuniform="true" />
 9 |         <material name="dark-wood" reflectance="0.5" texrepeat="5 5" texture="tex-dark-wood" texuniform="true" />
10 |         <mesh file="../objects/meshes/can.stl" name="can_mesh" />
11 |         <texture file="../objects/../textures/can.png" name="tex-can" />
12 |         <material name="coke" reflectance="0.7" texrepeat="5 5" texture="tex-can" texuniform="true" />
13 |     </asset>
14 |     <worldbody>
15 |     <body name="bin1" pos="0.65 -0.0 0.8">
16 |       <geom friction="1 0.005 0.0001" pos="0 0 0" size="0.15 0.15 0.02" type="box" />
17 |       <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="0 0 0" size="0.15 0.15 0.02" type="box" />
18 |         <geom friction="1 0.005 0.0001" pos="0 0.15 0.05" rgba="0 0 1 1" size="0.16 0.01 0.05" type="box" />
19 |         <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="0 0.15 0.05" size="0.16 0.01 0.05" type="box" />
20 |         <geom friction="1 0.005 0.0001" pos="0 -0.15 0.05" rgba="0 0 1 1" size="0.16 0.01 0.05" type="box" />
21 |         <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="0 -0.15 0.05" size="0.16 0.01 0.05" type="box" />
22 |         <geom friction="1 0.005 0.0001" pos="0.15 0 0.05" rgba="0 0 1 1" size="0.01 0.15 0.05" type="box" />
23 |         <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="0.15 0 0.05" size="0.01 0.15 0.05" type="box" />
24 |         <geom friction="1 0.005 0.0001" pos="-0.15 0 0.05" rgba="0 0 1 1" size="0.01 0.15 0.05" type="box" />
25 |         <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="-0.15 0 0.05" size="0.01 0.15 0.05" type="box" />
26 |     </body>
27 |         <body name="table" pos="0.66 0.0 0.4">
28 |           <geom friction="1 0.005 0.0001" name="table_collision" pos="0 0 0" size="0.4 0.73 0.41" type="box" />
29 |           <geom conaffinity="0" contype="0" group="1" name="table_visual" pos="0 0 0" size="0.4 0.4 0.41" type="box" />
30 |           <site name="table_top" pos="0.0 0.0 0.41" rgba="0 0 0 0" size="0.001 0.001 0.001" />
31 |       </body>
32 | 
33 |         <!-- <body name="cube" pos="0.65 &#45;0.45 0.85"> -->
34 |         <!--     <geom contype="1" conaffinity="1" density="100" friction="2.0 0.005 0.0001" condim="4" group="1" name="cube" pos="0 0 0" rgba="1 0 0 1" size="0.02 0.02 0.02" type="box"/> -->
35 |         <!--     <site name="cube" pos="0.0 0.0 0.0" rgba="1 0 0 1" size="0.001 0.001 0.001" type="sphere" /> -->
36 |         <!--     <joint name="cube" type="free" /> -->
37 |         <!-- </body> -->
38 |         <body name="cube" pos="0.65 -0.0 0.86" quat="-0.8795838801266339 0 0 0.47574383634617406">
39 |             <geom condim="4" density="100" friction="0.95 0.3 0.1" group="1" material="coke" mesh="can_mesh" name="cube" pos="0 0 0" solimp="0.998 0.998 0.001" solref="0.001 1" type="mesh" />
40 |             <site name="cube" pos="0 0 0" rgba="1 0 0 0" size="0.002 0.002 0.002" type="sphere" /><joint damping="0.0005" name="cube" type="free" />
41 |         </body>
42 |         </worldbody>
43 | </mujoco>
44 | 


--------------------------------------------------------------------------------
/env/assets/xml/sawyer_lift_obstacle.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="sawyer_lift">
 2 |     <include file="./common/sawyer_pick.xml" />
 3 |     <size njmax="8000" nconmax="4000"/>
 4 |     <default>
 5 |     </default>
 6 |     <asset>
 7 |         <texture file="../textures/light-wood.png" name="tex-light-wood" type="2d" />
 8 |         <texture file="../textures/dark-wood.png" name="tex-dark-wood" type="2d" />
 9 |         <material name="light-wood" reflectance="0.5" texrepeat="15 15" texture="tex-light-wood" texuniform="true" />
10 |         <material name="dark-wood" reflectance="0.5" texrepeat="5 5" texture="tex-dark-wood" texuniform="true" />
11 |         <mesh file="../objects/meshes/can.stl" name="can_mesh" />
12 |         <texture file="../objects/../textures/can.png" name="tex-can" />
13 |         <material name="coke" reflectance="0.7" texrepeat="5 5" texture="tex-can" texuniform="true" />
14 |     </asset>
15 |     <visual>
16 |         <headlight ambient=".5 .5 .5" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
17 |         <map znear=".01"/>
18 |         <quality shadowsize="2048"/>
19 |     </visual>
20 |     <worldbody>
21 |     <body name="bin1" pos="0.5 -0.25 0.8">
22 |       <geom friction="1 0.005 0.0001" pos="0 0 0" size="0.2 0.2 0.02" type="box" />
23 |       <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="0 0 0" size="0.2 0.2 0.02" type="box" />
24 |         <geom friction="1 0.005 0.0001" pos="0 0.2 0.05" rgba="0 0 1 1" size="0.21 0.01 0.35" type="box" />
25 |         <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="0 0.2 0.05" size="0.21 0.01 0.35" type="box" />
26 |         <geom friction="1 0.005 0.0001" pos="0 -0.2 0.05" rgba="0 0 1 1" size="0.21 0.01 0.35" type="box" />
27 |         <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="0 -0.2 0.05" size="0.21 0.01 0.35" type="box" />
28 |         <geom friction="1 0.005 0.0001" pos="0.2 0 0.05" rgba="0 0 1 1" size="0.01 0.2 0.35" type="box" />
29 |         <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="0.2 0 0.05" size="0.01 0.2 0.35" type="box" />
30 |         <geom friction="1 0.005 0.0001" pos="-0.2 0 0.05" rgba="0 0 1 1" size="0.01 0.2 0.35" type="box" />
31 |         <geom conaffinity="0" contype="0" group="1" material="light-wood" pos="-0.2 0 0.05" size="0.01 0.2 0.35" type="box" />
32 |     </body>
33 |       <body name="table" pos="0.66 0.0 0.40">
34 |         <geom friction="1 0.005 0.0001" name="table_collision" pos="0 0 0" size="0.4 0.73 0.41" type="box"  rgba="0.3 0.3 0.3 1.0"/>
35 |         <geom conaffinity="0" contype="0" group="1" name="table_visual" pos="0 0 0" size="0.4 0.4 0.41" type="box" rgba="0.3 0.3 0.3 1.0" />
36 |         <site name="table_top" pos="0.0 0.0 0.41" rgba="0 0 0 0" size="0.001 0.001 0.001" />
37 |      </body>
38 |       <!--   <body name="table" pos="0.66 0.0 0.4"> -->
39 |       <!--     <geom friction="1 0.005 0.0001" name="table_collision" pos="0 0 0" size="0.4 0.73 0.41" type="box" /> -->
40 |       <!--     <geom conaffinity="0" contype="0" group="1" name="table_visual" pos="0 0 0" size="0.4 0.4 0.41" type="box" /> -->
41 |       <!--     <site name="table_top" pos="0.0 0.0 0.41" rgba="0 0 0 0" size="0.001 0.001 0.001" /> -->
42 |       <!-- </body> -->
43 | 
44 |         <!-- <body name="obstacle" pos="0.66 0.0 0.82"> -->
45 |         <!--   <geom friction="1 0.005 0.0001" name="obstacle0" pos="&#45;0.25 &#45;0.15 0" size="0.02 0.02 0.8" type="box" rgba="0.55 0.25 0.07 1"/> -->
46 |         <!--   <geom conaffinity="0" contype="0" group="1" material="dark&#45;wood" pos="&#45;0.25 &#45;0.15 0." size="0.02 0.02 0.8" type="box" /> -->
47 |         <!--   <geom friction="1 0.005 0.0001" name="obstacle1" pos="0.35 &#45;0.1 0" size="0.02 0.02 0.8" type="box" rgba="0.55 0.25 0.07 1"/> -->
48 |         <!--   <geom conaffinity="0" contype="0" group="1" material="dark&#45;wood" pos="0.35 &#45;0.1 0." size="0.02 0.02 0.8" type="box" /> -->
49 |         <!--   <geom friction="1 0.005 0.0001" name="obstacle2" pos="0.10 0.1 0" size="0.02 0.02 0.8" type="box" rgba="0.55 0.25 0.07 1"/> -->
50 |         <!--   <geom conaffinity="0" contype="0" group="1" material="dark&#45;wood" pos="0.1 0.1 0." size="0.02 0.02 0.8" type="box" /> -->
51 |             <!-- </body> -->
52 | 
53 |         <body name="cube" pos="0.55 -0.25 0.86" quat="-0.8795838801266339 0 0 0.47574383634617406">
54 |         <!-- <body name="cube" pos="0.55 &#45;0.25 1.26" quat="&#45;0.8795838801266339 0 0 0.47574383634617406"> -->
55 |             <geom condim="4" density="100" friction="0.95 0.3 0.1" group="1" material="coke" mesh="can_mesh" name="cube" pos="0 0 0" solimp="0.998 0.998 0.001" solref="0.001 1" type="mesh" />
56 |             <site name="cube" pos="0 0 0" rgba="1 0 0 0" size="0.002 0.002 0.002" type="sphere" /><joint damping="0.0005" name="cube" type="free" />
57 |         </body>
58 |     </worldbody>
59 | </mujoco>
60 | 


--------------------------------------------------------------------------------
/env/assets/xml/sawyer_push.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="sawyer_lift">
 2 |     <include file="./common/sawyer.xml" />
 3 |     <default>
 4 |     </default>
 5 |     <worldbody>
 6 |         <body name="table" pos="0.66 0.0 0.41">
 7 |           <geom friction="1 0.005 0.0001" name="table_collision" pos="0 0 0" size="0.4 0.4 0.41" type="box" />
 8 |           <geom conaffinity="0" contype="0" group="1" name="table_visual" pos="0 0 0" size="0.4 0.4 0.41" type="box" />
 9 |           <site name="table_top" pos="0.0 0.0 0.41" rgba="0 0 0 0" size="0.001 0.001 0.001" />
10 |         </body>
11 |         <body name="cube" pos="0.5317717157210129 -0.023869616735809365 0.86" quat="0.8775825618903728 0 0 0.479425538604203">
12 |                 <!-- <geom density="400" friction="4.0 0.005 0.0001" group="1" name="cube" pos="0 0 0" rgba="1 0 0 1" size="0.04 0.04 0.04" type="box" /> -->
13 |             <inertial pos="0 0 0" mass="0.01" diaginertia="100000 100000 100000"/>
14 |             <geom contype="1" conaffinity="1" friction="2.0 0.005 0.0001" condim="4" group="1" name="cube" pos="0 0 0" rgba="1 0 0 1" size="0.04 0.04 0.04" type="box" solimp="0.99 0.99 0.01" solref="0.01 1"/>
15 |             <site name="cube" pos="0 0 0" rgba="1 0 0 1" size="0.002 0.002 0.002" type="sphere" />
16 |             <joint name="cube" type="free" />
17 |         </body>
18 |         <body name="target" pos="0.56 0.0 0.82">
19 |             <joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.3 .3" stiffness="0" type="slide"/>
20 |             <joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.3 .3" stiffness="0" type="slide"/>
21 |             <geom conaffinity="0" contype="0" group="0" name="target" pos="0 0 0" rgba="0.2 0.8 0.2 1" size=".030 .001 .001" type="cylinder"/>
22 |         </body>
23 |     </worldbody>
24 | </mujoco>
25 | 


--------------------------------------------------------------------------------
/env/assets/xml/sawyer_push_obstacle.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="sawyer_lift">
 2 |     <include file="./common/sawyer_no_gripper.xml" />
 3 |     <default>
 4 |     </default>
 5 |       <asset>
 6 |     <texture file="../textures/light-wood.png" type="2d" name="tex-light-wood"/>
 7 |     <texture file="../textures/dark-wood.png" type="2d" name="tex-dark-wood"/>
 8 |     <material name="light-wood" reflectance="0.5" texrepeat="15 15" texture="tex-light-wood" texuniform="true"/>
 9 |     <material name="dark-wood" reflectance="0.5" texrepeat="5 5" texture="tex-dark-wood" texuniform="true"/>
10 | </asset>
11 |     <visual>
12 |         <headlight ambient=".5 .5 .5" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
13 |         <map znear=".01"/>
14 |         <quality shadowsize="2048"/>
15 |     </visual>
16 |     <worldbody>
17 |     <body name="table" pos="1.05 0.0 0.8">
18 |         <geom pos="0 0 0" size="0.4 0.4 0.02" type="box" friction="1 0.005 0.0001"/>
19 |         <geom pos="0 0 0" material="light-wood" size="0.4 0.4 0.02" type="box" conaffinity="0" contype="0" group="1"/>
20 |         <geom pos="0.38 0.38 -0.4" size="0.02 0.4 0.08" type="cylinder" friction="1 0.005 0.0001" rgba="0 0 1 1" />
21 |         <geom pos="0.38 0.38 -0.4" size="0.02 0.4 0.08" type="cylinder" conaffinity="0" contype="0" group="1" material="light-wood"/>
22 |         <geom pos="-0.38 -0.38 -0.4" size="0.02 0.4 0.08" type="cylinder" friction="1 0.005 0.0001" rgba="0 0 1 1" />
23 |         <geom pos="-0.38 -0.38 -0.4" size="0.02 0.4 0.08" type="cylinder" conaffinity="0" contype="0" group="1" material="light-wood"/>
24 |         <geom pos="-0.38 0.38 -0.4" size="0.02 0.4 0.08" type="cylinder" friction="1 0.005 0.0001" rgba="0 0 1 1" />
25 |         <geom pos="-0.38 0.38 -0.4" size="0.02 0.4 0.08" type="cylinder" conaffinity="0" contype="0" group="1" material="light-wood"/>
26 |         <geom pos="0.38 -0.38 -0.4" size="0.02 0.4 0.08" type="cylinder" friction="1 0.005 0.0001" rgba="0 0 1 1" />
27 |         <geom pos="0.38 -0.38 -0.4" size="0.02 0.4 0.08" type="cylinder" conaffinity="0" contype="0" group="1" material="light-wood"/>
28 |     </body>
29 |     <body name="bin1" pos="0.93 0.0 1.02" quat="0 1 0 1">
30 |         <geom friction="1 0.005 0.0001" pos="0 0.18 0.05" rgba="0 0 1 1" size="0.21 0.01 0.2" type="box" />
31 |         <geom conaffinity="0" contype="0" group="1" material="dark-wood" pos="0 0.18 0.05" size="0.21 0.01 0.20" type="box" />
32 |         <geom friction="1 0.005 0.0001" pos="0 -0.18 0.05" rgba="0 0 1 1" size="0.21 0.01 0.2" type="box" />
33 |         <geom conaffinity="0" contype="0" group="1" material="dark-wood" pos="0 -0.18 0.05" size="0.21 0.01 0.2" type="box" />
34 |         <geom friction="1 0.005 0.0001" pos="0.2 0 0.05" rgba="0 0 1 1" size="0.01 0.18 0.2" type="box" />
35 |         <geom conaffinity="0" contype="0" group="1" material="dark-wood" pos="0.2 0 0.05" size="0.01 0.18 0.2" type="box" />
36 |         <geom friction="1 0.005 0.0001" pos="-0.2 0 0.05" rgba="0 0 1 1" size="0.01 0.18 0.2" type="box" />
37 |         <geom conaffinity="0" contype="0" group="1" material="dark-wood" pos="-0.2 0 0.05" size="0.01 0.18 0.2" type="box" />
38 |     </body>
39 | 
40 |         <body name="cube" pos="0.92 0.0 0.88">
41 |             <!-- <inertial pos="0 0 0" mass="0.01" diaginertia="100000 100000 100000"/> -->
42 |             <!-- <inertial pos="0 0 0" mass="500" diaginertia="1000 1000 1000"/> -->
43 |             <!-- <inertial pos="0 0 0" mass="250" diaginertia="1000 1000 1000"/> -->
44 |             <!-- <inertial pos="0 0 0" mass="150" diaginertia="1000 1000 1000"/> -->
45 |             <!-- <geom contype="1" density="100" conaffinity="1" friction="0.95 0.3 0.1" condim="4" group="1" name="cube" pos="0 0 0" rgba="1 0 0 1" size="0.03 0.03 0.03" type="box" solimp="0.99 0.99 0.01" solref="0.001 1"/> -->
46 |             <geom contype="1" density="300" conaffinity="1" friction="0.95 0.3 0.1" condim="4" group="1" name="cube" pos="0 0 0" rgba="1 0 0 1" size="0.03 0.03 0.03" type="box"/>
47 |             <site name="cube" pos="0 0 0" rgba="1 0 0 1" size="0.002 0.002 0.002" type="sphere" />
48 |             <joint name="cube" type="free" damping="0.0005"/>
49 |         </body>
50 |         <body name="target" pos="1.04 0.0 0.85">
51 |             <joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.3 .3" stiffness="0" type="slide"/>
52 |             <joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.3 .3" stiffness="0" type="slide"/>
53 |             <geom conaffinity="0" contype="0" group="0" name="target" pos="0 0 0" rgba="0.2 0.8 0.2 1" size=".030 .001 .001" type="cylinder"/>
54 |         </body>
55 |     </worldbody>
56 | </mujoco>
57 | 


--------------------------------------------------------------------------------
/env/pusher/__init__.py:
--------------------------------------------------------------------------------
1 | from env.pusher.pusher_obstacle import PusherObstacleEnv
2 | 


--------------------------------------------------------------------------------
/env/sawyer/__init__.py:
--------------------------------------------------------------------------------
1 | from env.sawyer.sawyer_push import SawyerPushEnv
2 | from env.sawyer.sawyer_push_obstacle import SawyerPushObstacleEnv
3 | from env.sawyer.sawyer_lift import SawyerLiftEnv
4 | from env.sawyer.sawyer_assembly import SawyerAssemblyEnv
5 | from env.sawyer.sawyer_assembly_obstacle import SawyerAssemblyObstacleEnv
6 | from env.sawyer.sawyer_assembly import SawyerAssemblyEnv
7 | from env.sawyer.sawyer_lift_obstacle import SawyerLiftObstacleEnv
8 | 


--------------------------------------------------------------------------------
/env/sawyer/sawyer_assembly.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from env.sawyer.sawyer import SawyerEnv
  4 | 
  5 | 
  6 | class SawyerAssemblyEnv(SawyerEnv):
  7 |     def __init__(self, **kwargs):
  8 |         kwargs["camera_name"] = "topview"
  9 |         super().__init__("sawyer_assembly.xml", **kwargs)
 10 |         self._get_reference()
 11 | 
 12 |     def _get_reference(self):
 13 |         super()._get_reference()
 14 | 
 15 |     @property
 16 |     def dof(self):
 17 |         return 7
 18 | 
 19 |     @property
 20 |     def init_qpos(self):
 21 |         return np.array([0.427, 0.13, 0.0557, 0.114, -0.0622, 0.0276, 0.00356])
 22 | 
 23 |     def _reset(self):
 24 |         init_qpos = (
 25 |             self.init_qpos + self.np_random.randn(self.init_qpos.shape[0]) * 0.02
 26 |         )
 27 |         self.sim.data.qpos[self.ref_joint_pos_indexes] = init_qpos
 28 |         self.sim.data.qvel[self.ref_joint_vel_indexes] = 0.0
 29 |         self.sim.forward()
 30 | 
 31 |         return self._get_obs()
 32 | 
 33 |     def compute_reward(self, action):
 34 |         info = {}
 35 |         reward = 0
 36 |         reward_type = self._kwargs["reward_type"]
 37 |         pegHeadPos = self.sim.data.get_site_xpos("pegHead")
 38 |         hole = self.sim.data.get_site_xpos("hole")
 39 |         dist = np.linalg.norm(pegHeadPos - hole)
 40 |         hole_bottom = self.sim.data.get_site_xpos("hole_bottom")
 41 |         dist_to_hole_bottom = np.linalg.norm(pegHeadPos - hole_bottom)
 42 |         if reward_type == "dense":
 43 |             reward_reach = np.tanh(-1.5 * dist)
 44 |             reward += reward_reach
 45 |             info = dict(reward_reach=reward_reach)
 46 |         else:
 47 |             reward_reach = 0
 48 |             if dist < 0.3:
 49 |                 # reward_reach += 0.4 * (1-np.tanh(15*dist))
 50 |                 reward_reach += 0.4 * (1 - np.tanh(15 * dist_to_hole_bottom))
 51 |             reward += reward_reach
 52 |         if dist_to_hole_bottom < 0.04:
 53 |             reward += self._kwargs["success_reward"]
 54 |             self._success = True
 55 |             self._terminal = True
 56 | 
 57 |         return reward, info
 58 | 
 59 |     def _get_obs(self):
 60 |         di = super()._get_obs()
 61 |         di["hole"] = self.sim.data.get_site_xpos("hole")
 62 |         di["pegHead"] = self.sim.data.get_site_xpos("pegHead")
 63 |         di["pegEnd"] = self.sim.data.get_site_xpos("pegEnd")
 64 |         di["peg_quat"] = self._get_quat("peg")
 65 |         return di
 66 | 
 67 |     @property
 68 |     def static_bodies(self):
 69 |         return ["table", "furniture", "0_part0", "1_part1", "4_part4", "2_part2"]
 70 | 
 71 |     @property
 72 |     def static_geoms(self):
 73 |         return []
 74 | 
 75 |     @property
 76 |     def static_geom_ids(self):
 77 |         body_ids = []
 78 |         for body_name in self.static_bodies:
 79 |             body_ids.append(self.sim.model.body_name2id(body_name))
 80 | 
 81 |         geom_ids = []
 82 |         for geom_id, body_id in enumerate(self.sim.model.geom_bodyid):
 83 |             if body_id in body_ids:
 84 |                 geom_ids.append(geom_id)
 85 |         return geom_ids
 86 | 
 87 |     def _step(self, action, is_planner=False):
 88 |         """
 89 |         (Optional) does gripper visualization after actions.
 90 |         """
 91 |         assert len(action) == self.dof, "environment got invalid action dimension"
 92 | 
 93 |         if not is_planner or self._prev_state is None:
 94 |             self._prev_state = self.sim.data.qpos[self.ref_joint_pos_indexes].copy()
 95 | 
 96 |         if self._i_term is None:
 97 |             self._i_term = np.zeros_like(self.mujoco_robot.dof)
 98 | 
 99 |         if is_planner:
100 |             rescaled_ac = np.clip(
101 |                 action[: self.robot_dof], -self._ac_scale, self._ac_scale
102 |             )
103 |         else:
104 |             rescaled_ac = np.clip(
105 |                 action[: self.robot_dof] * self._ac_scale,
106 |                 -self._ac_scale,
107 |                 self._ac_scale,
108 |             )
109 |         desired_state = self._prev_state + rescaled_ac
110 | 
111 |         n_inner_loop = int(self._frame_dt / self.dt)
112 |         for _ in range(n_inner_loop):
113 |             self.sim.data.qfrc_applied[
114 |                 self.ref_joint_vel_indexes
115 |             ] = self.sim.data.qfrc_bias[self.ref_joint_vel_indexes].copy()
116 | 
117 |             if self.use_robot_indicator:
118 |                 self.sim.data.qfrc_applied[
119 |                     self.ref_indicator_joint_pos_indexes
120 |                 ] = self.sim.data.qfrc_bias[self.ref_indicator_joint_pos_indexes].copy()
121 | 
122 |             if self.use_target_robot_indicator:
123 |                 self.sim.data.qfrc_applied[
124 |                     self.ref_target_indicator_joint_pos_indexes
125 |                 ] = self.sim.data.qfrc_bias[
126 |                     self.ref_target_indicator_joint_pos_indexes
127 |                 ].copy()
128 |             self._do_simulation(desired_state)
129 | 
130 |         self._prev_state = np.copy(desired_state)
131 |         reward, info = self.compute_reward(action)
132 | 
133 |         return self._get_obs(), reward, self._terminal, info
134 | 


--------------------------------------------------------------------------------
/env/sawyer/sawyer_assembly_obstacle.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from env.sawyer.sawyer import SawyerEnv
  4 | 
  5 | 
  6 | class SawyerAssemblyObstacleEnv(SawyerEnv):
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__("sawyer_assembly_obstacle.xml", **kwargs)
  9 |         self._get_reference()
 10 | 
 11 |     def _get_reference(self):
 12 |         super()._get_reference()
 13 | 
 14 |     @property
 15 |     def dof(self):
 16 |         return 7
 17 | 
 18 |     @property
 19 |     def init_qpos(self):
 20 |         return np.array([0.427, 0.13, 0.0557, 0.114, -0.0622, 0.0276, 0.00356])
 21 | 
 22 |     def _reset(self):
 23 |         init_qpos = (
 24 |             self.init_qpos + self.np_random.randn(self.init_qpos.shape[0]) * 0.02
 25 |         )
 26 |         self.sim.data.qpos[self.ref_joint_pos_indexes] = init_qpos
 27 |         self.sim.data.qvel[self.ref_joint_vel_indexes] = 0.0
 28 |         self.sim.forward()
 29 | 
 30 |         return self._get_obs()
 31 | 
 32 |     def compute_reward(self, action):
 33 |         info = {}
 34 |         reward = 0
 35 |         reward_type = self._kwargs["reward_type"]
 36 |         pegHeadPos = self.sim.data.get_site_xpos("pegHead")
 37 |         hole = self.sim.data.get_site_xpos("hole")
 38 |         dist = np.linalg.norm(pegHeadPos - hole)
 39 |         hole_bottom = self.sim.data.get_site_xpos("hole_bottom")
 40 |         dist_to_hole_bottom = np.linalg.norm(pegHeadPos - hole_bottom)
 41 |         dist_to_hole = np.linalg.norm(pegHeadPos - hole)
 42 |         reward_reach = 0
 43 |         if dist < 0.3:
 44 |             reward_reach += 0.4 * (1 - np.tanh(15 * dist_to_hole))
 45 |         reward += reward_reach
 46 |         if dist_to_hole_bottom < 0.025:
 47 |             reward += self._kwargs["success_reward"]
 48 |             self._success = True
 49 |             self._terminal = True
 50 | 
 51 |         return reward, info
 52 | 
 53 |     def _get_obs(self):
 54 |         di = super()._get_obs()
 55 |         di["hole"] = self.sim.data.get_site_xpos("hole")
 56 |         di["pegHead"] = self.sim.data.get_site_xpos("pegHead")
 57 |         di["pegEnd"] = self.sim.data.get_site_xpos("pegEnd")
 58 |         di["peg_quat"] = self._get_quat("peg")
 59 |         return di
 60 | 
 61 |     @property
 62 |     def static_bodies(self):
 63 |         return ["table"]
 64 | 
 65 |     @property
 66 |     def manipulation_bodies(self):
 67 |         return ["furniture", "0_part0", "1_part1", "4_part4", "2_part2"]
 68 | 
 69 |     @property
 70 |     def manipulation_geom_ids(self):
 71 |         body_ids = []
 72 |         for body_name in self.manipulation_bodies:
 73 |             body_ids.append(self.sim.model.body_name2id(body_name))
 74 | 
 75 |         geom_ids = []
 76 |         for geom_id, body_id in enumerate(self.sim.model.geom_bodyid):
 77 |             if body_id in body_ids:
 78 |                 geom_ids.append(geom_id)
 79 |         return geom_ids
 80 | 
 81 |     @property
 82 |     def static_geoms(self):
 83 |         return []
 84 | 
 85 |     @property
 86 |     def static_geom_ids(self):
 87 |         body_ids = []
 88 |         for body_name in self.static_bodies:
 89 |             body_ids.append(self.sim.model.body_name2id(body_name))
 90 | 
 91 |         geom_ids = []
 92 |         for geom_id, body_id in enumerate(self.sim.model.geom_bodyid):
 93 |             if body_id in body_ids:
 94 |                 geom_ids.append(geom_id)
 95 |         return geom_ids
 96 | 
 97 |     def _step(self, action, is_planner=False):
 98 |         """
 99 |         (Optional) does gripper visualization after actions.
100 |         """
101 |         assert len(action) == self.dof, "environment got invalid action dimension"
102 | 
103 |         if not is_planner or self._prev_state is None:
104 |             self._prev_state = self.sim.data.qpos[self.ref_joint_pos_indexes].copy()
105 | 
106 |         if self._i_term is None:
107 |             self._i_term = np.zeros_like(self.mujoco_robot.dof)
108 | 
109 |         if is_planner:
110 |             rescaled_ac = np.clip(
111 |                 action[: self.robot_dof], -self._ac_scale, self._ac_scale
112 |             )
113 |         else:
114 |             rescaled_ac = np.clip(
115 |                 action[: self.robot_dof] * self._ac_scale,
116 |                 -self._ac_scale,
117 |                 self._ac_scale,
118 |             )
119 |         desired_state = self._prev_state + rescaled_ac
120 | 
121 |         n_inner_loop = int(self._frame_dt / self.dt)
122 |         for _ in range(n_inner_loop):
123 |             self.sim.data.qfrc_applied[
124 |                 self.ref_joint_vel_indexes
125 |             ] = self.sim.data.qfrc_bias[self.ref_joint_vel_indexes].copy()
126 | 
127 |             if self.use_robot_indicator:
128 |                 self.sim.data.qfrc_applied[
129 |                     self.ref_indicator_joint_pos_indexes
130 |                 ] = self.sim.data.qfrc_bias[self.ref_indicator_joint_pos_indexes].copy()
131 | 
132 |             if self.use_target_robot_indicator:
133 |                 self.sim.data.qfrc_applied[
134 |                     self.ref_target_indicator_joint_pos_indexes
135 |                 ] = self.sim.data.qfrc_bias[
136 |                     self.ref_target_indicator_joint_pos_indexes
137 |                 ].copy()
138 |             self._do_simulation(desired_state)
139 | 
140 |         self._prev_state = np.copy(desired_state)
141 |         reward, info = self.compute_reward(action)
142 | 
143 |         return self._get_obs(), reward, self._terminal, info
144 | 


--------------------------------------------------------------------------------
/motion_planners/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/motion_planners/__init__.py


--------------------------------------------------------------------------------
/motion_planners/include/KinematicPlanner.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KinematicPlanner_h
 2 | #define _KinematicPlanner_h
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include <iostream>
 7 | #include <cstdlib>
 8 | #include <fstream>
 9 | #include <memory>
10 | #include <random>
11 | #include <stdexcept>
12 | #include <string>
13 | 
14 | #include <ompl/control/SimpleSetup.h>
15 | #include <ompl/geometric/SimpleSetup.h>
16 | #include <ompl/geometric/PathSimplifier.h>
17 | #include <ompl/geometric/planners/rrt/RRTstar.h>
18 | #include <ompl/geometric/planners/rrt/RRTConnect.h>
19 | #include <ompl/geometric/PathSimplifier.h>
20 | 
21 | #include <ompl/base/samplers/ObstacleBasedValidStateSampler.h>
22 | 
23 | #include <ompl/base/SpaceInformation.h>
24 | #include <ompl/base/objectives/PathLengthOptimizationObjective.h>
25 | #include <ompl/base/objectives/StateCostIntegralObjective.h>
26 | #include <ompl/base/objectives/MaximizeMinClearanceObjective.h>
27 | #include <ompl/base/spaces/RealVectorStateSpace.h>
28 | #include <yaml-cpp/yaml.h>
29 | 
30 | #include <cxxopts.hpp>
31 | #include "mujoco_wrapper.h"
32 | #include "mujoco_ompl_interface.h"
33 | 
34 | namespace ob = ompl::base;
35 | namespace oc = ompl::control;
36 | namespace og = ompl::geometric;
37 | 
38 | namespace MotionPlanner
39 | {
40 |     class KinematicPlanner
41 |     {
42 |         public:
43 |             std::string xml_filename;
44 |             std::string algo;
45 |             int num_actions;
46 |             std::string opt;
47 |             double threshold;
48 |             double _range;
49 |             std::string mjkey_filename;
50 |             std::shared_ptr<MuJoCo> mj;
51 |             std::shared_ptr<ob::SpaceInformation> si;
52 |             std::shared_ptr<MjOmpl::MujocoStatePropagator> mj_state_prop;
53 |             std::shared_ptr<og::RRTstar> rrt_planner;
54 |             std::shared_ptr<og::RRTConnect> rrt_connect_planner;
55 |             std::shared_ptr<og::SimpleSetup> ss;
56 |             std::shared_ptr<MjOmpl::MujocoStateValidityChecker> msvc;
57 |             std::shared_ptr<og::PathSimplifier> psimp_;
58 |             std::vector<int> passive_joint_idx;
59 |             std::vector<std::string> glue_bodies;
60 |             std::vector<std::pair<int,int>> ignored_contacts;
61 |             std::string planner_status;
62 |             bool isSimplified;
63 |             double simplifiedDuration;
64 |             int seed;
65 | 
66 |             KinematicPlanner(std::string xml_filename, std::string algo, int num_actions, std::string opt, double threshold, double _range, std::vector<int> passive_joint_idx, std::vector<std::string> Glue_bodies, std::vector<std::pair<int, int>> ignored_contacts, double contact_threshold, double goal_bias, bool is_simplified, double simplified_duration, int seed);
67 |             ~KinematicPlanner();
68 |             std::vector<std::vector<double> > plan(std::vector<double> start_vec, std::vector<double> goal_vec, double timelimit);
69 |             bool isValidState(std::vector<double> state_vec);
70 |             std::string getPlannerStatus();
71 |     };
72 | }
73 | 
74 | #endif
75 | 


--------------------------------------------------------------------------------
/motion_planners/include/c_planner.pxd.bak:
--------------------------------------------------------------------------------
1 | from libcpp.string cimport string
2 | from libcpp.vector cimport vector
3 | 
4 | cdef extern from "Planner.h" namespace "MotionPlanner":
5 |     cdef cppclass Planner:
6 |         Planner(string)
7 |         string xml_filename
8 |         int planning(vector[double], vector[double], double)
9 | 


--------------------------------------------------------------------------------
/motion_planners/include/compound_state_projector.h:
--------------------------------------------------------------------------------
 1 | /// Provide a state projection for compound states.
 2 | /// This only uses the elements of the state that are expressed as a vector
 3 | /// of doubles.
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <iostream>
 8 | #include <vector>
 9 | 
10 | #include <ompl/base/ProjectionEvaluator.h>
11 | #include <ompl/base/spaces/RealVectorStateProjections.h>
12 | #include <Eigen/Dense>
13 | 
14 | class CompoundStateProjector
15 |         : public ompl::base::RealVectorRandomLinearProjectionEvaluator {
16 |   public:
17 |     CompoundStateProjector(const ompl::base::CompoundStateSpace* space,
18 |         std::shared_ptr<ompl::base::RealVectorStateSpace> real_space_,
19 |         int dim_)
20 |             : RealVectorRandomLinearProjectionEvaluator(real_space_, dim_),
21 |             real_space(real_space_),
22 |             dim(dim_) {
23 |     }
24 | 
25 |     CompoundStateProjector(const ompl::base::CompoundStateSpace* space,
26 |         std::shared_ptr<ompl::base::RealVectorStateSpace> real_space_,
27 |         std::vector<double> cell_sizes)
28 |             : RealVectorRandomLinearProjectionEvaluator(real_space_, cell_sizes),
29 |             real_space(real_space_),
30 |             dim(cell_sizes.size()) {}
31 | 
32 |     static std::shared_ptr<CompoundStateProjector> makeCompoundStateProjector(
33 |         const ompl::base::CompoundStateSpace* space)
34 |     {
35 |         auto real_space = std::make_shared<ompl::base::RealVectorStateSpace>(
36 |             space->getDimension());
37 |         // real_space->setBounds(-3.14, 3.14);
38 |         // real_space->setBounds(-1, 1);
39 |         int dim = 3;
40 |         std::vector<double> cell_sizes(dim, 0.1);
41 |         auto csp(std::make_shared<CompoundStateProjector>(
42 |             space, real_space, cell_sizes));
43 |         return csp;
44 |     }
45 | 
46 |     unsigned int getDimension() const override {
47 |         return dim;
48 |     }
49 | 
50 |     void project(
51 |         const ompl::base::State* state,
52 |         Eigen::Ref< Eigen::VectorXd > projection) const override
53 |     {
54 |         auto rv_state = getRealVectorState(state);
55 | 
56 |         // Use a real space projection
57 |         ompl::base::RealVectorRandomLinearProjectionEvaluator::project(
58 |             rv_state, projection);
59 | 
60 |         // Cleanup
61 |         real_space->freeState(rv_state);
62 |     }
63 | 
64 | 
65 |   private:
66 |     ompl::base::State* getRealVectorState(const ompl::base::State* state) const {
67 |         // Create a real vector state
68 |         std::vector<double> reals;
69 |         real_space->copyToReals(reals, state);
70 |         auto rv_state = real_space->allocState();
71 |         for(size_t i=0; i < reals.size(); i++) {
72 |             rv_state->as<ompl::base::RealVectorStateSpace::StateType>()
73 |                 ->values[i] = reals[i];
74 |         }
75 | 
76 |         return rv_state;
77 |     }
78 | 
79 |     std::shared_ptr<ompl::base::RealVectorStateSpace> real_space;
80 |     const int dim;
81 | };
82 | 


--------------------------------------------------------------------------------
/motion_planners/include/mujoco_wrapper.h:
--------------------------------------------------------------------------------
  1 | // Wrap MuJoCo functionality to make it available to OMPL API
  2 | 
  3 | #pragma once
  4 | 
  5 | #include <cmath>
  6 | #include <iostream>
  7 | #include <mutex>
  8 | #include <vector>
  9 | 
 10 | #include "mujoco.h"
 11 | 
 12 | struct JointInfo {
 13 |     std::string name;
 14 |     int type;
 15 |     bool limited;
 16 |     mjtNum range[2];
 17 |     int qposadr;
 18 |     int dofadr;
 19 | };
 20 | 
 21 | std::ostream& operator<<(std::ostream& os, const JointInfo& ji);
 22 | 
 23 | std::vector<JointInfo> getJointInfo(const mjModel* m);
 24 | 
 25 | struct StateRange {
 26 |     bool limited;
 27 |     mjtNum range[2];
 28 | };
 29 | 
 30 | StateRange getCtrlRange(const mjModel* m, size_t i);
 31 | 
 32 | struct MuJoCoState {
 33 |     mjtNum time;
 34 |     std::vector<mjtNum> qpos;
 35 |     std::vector<mjtNum> qvel;
 36 |     std::vector<mjtNum> act;
 37 |     std::vector<mjtNum> ctrl;
 38 | };
 39 | 
 40 | std::ostream& operator<<(std::ostream& os, const MuJoCoState& s);
 41 | 
 42 | 
 43 | // Put some sanity on the MuJoCo API
 44 | class MuJoCo {
 45 |   public:
 46 |     MuJoCo(std::string mjkey_filename):
 47 |       m(0), d(0) {
 48 |         // mj_activate and mj_deactivate should only be called
 49 |         // once per program
 50 |         // mj_instance_count_lock.lock();
 51 |         //if (mj_instance_count == 0) {
 52 |         mj_activate(mjkey_filename.c_str());
 53 |         //}
 54 |         //mj_instance_count += 1;
 55 |         //mj_instance_count_lock.unlock();
 56 |     }
 57 | 
 58 |     ~MuJoCo() {
 59 |         if (d) mj_deleteData(d);
 60 |         if (m) mj_deleteModel(m);
 61 |         mj_instance_count_lock.lock();
 62 |         mj_instance_count -= 1;
 63 |         if (mj_instance_count == 0) {
 64 |             mj_deactivate();
 65 |         }
 66 |         mj_instance_count_lock.unlock();
 67 |     }
 68 | 
 69 |     // TODO: copy constructor
 70 |     // TODO: assignment operator
 71 | 
 72 |     bool loadXML(std::string filename) {
 73 |         if (m) mj_deleteModel(m);
 74 |         char error[1000];
 75 |         m = mj_loadXML(filename.c_str(), 0, error, 1000);
 76 |         if (!m) {
 77 |             std::cerr << error << std::endl;
 78 |         }
 79 |         max_timestep = m->opt.timestep;
 80 |         return m;
 81 |     }
 82 | 
 83 |     bool makeData() {
 84 |         if (!m) {
 85 |             std::cerr << "Cannot makeData without a model!" << std::endl;
 86 |             return false;
 87 |         }
 88 |         if (d) mj_deleteData(d);
 89 |         d = mj_makeData(m);
 90 |         return d;
 91 |     }
 92 | 
 93 |     
 94 | 
 95 |     std::string getJointName(int i) const {
 96 |         // Avert your eyes of this horror
 97 |         return std::string(m->names + m->name_jntadr[i]);
 98 |     }
 99 | 
100 |     std::string getBodyName(int i) const {
101 |         return std::string(m->names + m->name_bodyadr[i]);
102 |     }
103 | 
104 |     std::string getActName(int i) const {
105 |         return std::string(m->names + m->name_actuatoradr[i]);
106 |     }
107 | 
108 |     /// Set the world to random state within specified limits
109 |     ///   modifies d->qpos and d->qvel
110 |     void setRandomState() {
111 |         mj_resetData(m, d);
112 |         // Set default states
113 |         for (size_t i=0; i < m->nq; i++) {
114 |             d->qpos[i] = m->qpos0[i];
115 |         }
116 | 
117 |         // Set random states within joint limit for DoFs
118 |         auto joints = getJointInfo(m);
119 |         for (size_t i=0; i < m->nv; i++) {
120 |             int joint_id = m->dof_jntid[i];
121 |             int qposadr = m->jnt_qposadr[ joint_id ];
122 | 
123 |             mjtNum r = ((mjtNum) rand()) / ((mjtNum) RAND_MAX);
124 |             auto lower = joints[joint_id].range[0];
125 |             auto upper = joints[joint_id].range[1];
126 |             if (!joints[joint_id].limited) {
127 |                 // set range to -pi to pi
128 |                 lower = -3.1416;
129 |                 upper = 3.1416;
130 |             }
131 |             d->qpos[qposadr] = (r * (upper - lower)) + lower;
132 | 
133 |             // velocity = 0 seem reasonable
134 |             d->qvel[i] = 0;
135 |         }
136 |     }
137 | 
138 |     void setState(MuJoCoState s) {
139 |         d->time = s.time;
140 |         for(size_t i=0; i < m->nq; i++) {
141 |             if (i >= s.qpos.size()) break;
142 |             d->qpos[i] = s.qpos[i];    
143 |         }
144 |         for(size_t i=0; i < m->nv; i++) {
145 |             if (i >= s.qvel.size()) break;
146 |             d->qvel[i] = s.qvel[i];    
147 |         }
148 |         for(size_t i=0; i < m->na; i++) {
149 |             if (i >= s.act.size()) break;
150 |             d->act[i] = s.act[i];    
151 |         }
152 |         for(size_t i=0; i < m->nu; i++) {
153 |             if (i >= s.ctrl.size()) break;
154 |             d->ctrl[i] = s.ctrl[i];    
155 |         }
156 |     }
157 | 
158 |     MuJoCoState getState() const {
159 |         MuJoCoState s;
160 |         s.time = d->time;
161 |         for(size_t i=0; i < m->nq; i++) {
162 |             s.qpos.push_back(d->qpos[i]);
163 |         }
164 |         for(size_t i=0; i < m->nv; i++) {
165 |             s.qvel.push_back(d->qvel[i]);
166 |         }
167 |         for(size_t i=0; i < m->na; i++) {
168 |             s.act.push_back(d->act[i]);
169 |         }
170 |         for(size_t i=0; i < m->nu; i++) {
171 |             s.ctrl.push_back(d->ctrl[i]);
172 |         }
173 |         return s;
174 |     }
175 | 
176 |     void step() {
177 |         mj_step(m, d);
178 |     }
179 | 
180 |     void sim_duration(double duration) {
181 |         int steps = ceil(duration / max_timestep);
182 |         m->opt.timestep = duration / steps;
183 |         for(int i=0; i < steps; i++) {
184 |             mj_step(m, d);
185 |         }
186 |     }
187 | 
188 |     double getMaxTimestep() const {
189 |         return max_timestep;
190 |     }
191 | 
192 |     mjModel* m;
193 |     mjData* d;
194 | 
195 |   private:
196 |     double max_timestep;
197 |     static int mj_instance_count;
198 |     static std::mutex mj_instance_count_lock;
199 | };
200 | 
201 | 


--------------------------------------------------------------------------------
/motion_planners/include/plan.pxd.bak:
--------------------------------------------------------------------------------
1 | cimport c_planner
2 | from c_planner cimport Planner as CPlanner
3 | 
4 | cdef class Planner:
5 |     cdef c_planner.Planner *thisptr
6 |     cpdef planning(self, start_vec, goal_vec, timelimit)
7 | 


--------------------------------------------------------------------------------
/motion_planners/planner.pyx:
--------------------------------------------------------------------------------
 1 | # distutils: language = c++
 2 | # distutils: sources = KinematicPlanner.cpp
 3 | 
 4 | from libcpp.string cimport string
 5 | from libcpp cimport bool as bool_
 6 | from libcpp.vector cimport vector
 7 | from libcpp.pair cimport pair
 8 | 
 9 | cdef extern from "KinematicPlanner.h" namespace "MotionPlanner":
10 |   cdef cppclass KinematicPlanner:
11 |         KinematicPlanner(string, string, int, string, double, double, vector[int], vector[string],
12 |                          vector[pair[int, int]], double, double, bool_, double, int) except +
13 |         string xml_filename
14 |         string opt
15 |         int num_actions
16 |         string algo
17 |         double _range
18 |         double threshold
19 |         vector[int] passive_joint_idx
20 |         vector[string] glue_bodies
21 |         vector[pair[int, int]] ignored_contacts
22 |         double contact_threshold
23 |         string planner_status
24 |         bool_ isSimplified
25 |         double simplifiedDuration
26 |         vector[vector[double]] plan(vector[double], vector[double], double)
27 |         bool_ isValidState(vector[double])
28 |         string getPlannerStatus()
29 |         int seed
30 | 
31 | cdef class PyKinematicPlanner:
32 |     cdef KinematicPlanner *thisptr
33 |     def __cinit__(self, string xml_filename, string algo, int num_actions, string opt, double threshold,
34 |                   double _range, vector[int] passive_joint_idx, vector[string] glue_bodies,
35 |                   vector[pair[int, int]] ignored_contacts, double contact_threshold, double goal_bias,
36 |                   bool_ is_simplified, double simplified_duration, int seed):
37 | 
38 |         self.thisptr = new KinematicPlanner(xml_filename, algo, num_actions, opt, threshold, _range,
39 |                                            passive_joint_idx, glue_bodies, ignored_contacts, contact_threshold,
40 |                                             goal_bias, is_simplified, simplified_duration, seed)
41 | 
42 |     def __dealloc__(self):
43 |         del self.thisptr
44 | 
45 |     cpdef plan(self, start_vec, goal_vec, timelimit):
46 |         return self.thisptr.plan(start_vec, goal_vec, timelimit)
47 | 
48 |     cpdef getPlannerStatus(self):
49 |         return self.thisptr.getPlannerStatus()
50 | 
51 |     cpdef isValidState(self, state_vec):
52 |         return self.thisptr.isValidState(state_vec)
53 | 
54 | 


--------------------------------------------------------------------------------
/motion_planners/sampling_based_planner.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | 
  3 | import numpy as np
  4 | import subprocess
  5 | from threading import Lock, Thread
  6 | import yaml
  7 | from motion_planners.planner import PyKinematicPlanner
  8 | from util.env import joint_convert
  9 | 
 10 | 
 11 | class SamplingBasedPlanner:
 12 |     def __init__(
 13 |         self,
 14 |         config,
 15 |         xml_path,
 16 |         num_actions,
 17 |         non_limited_idx,
 18 |         planner_type=None,
 19 |         passive_joint_idx=[],
 20 |         glue_bodies=[],
 21 |         ignored_contacts=[],
 22 |         contact_threshold=0.0,
 23 |         goal_bias=0.05,
 24 |         is_simplified=False,
 25 |         simplified_duration=0.1,
 26 |         range_=None,
 27 |     ):
 28 |         self.config = config
 29 |         if planner_type is None:
 30 |             planner_type = config.planner_type
 31 |         if range_ is None:
 32 |             range_ = config.range
 33 |         self.planner = PyKinematicPlanner(
 34 |             xml_path.encode("utf-8"),
 35 |             planner_type.encode("utf-8"),
 36 |             num_actions,
 37 |             config.planner_objective.encode("utf-8"),
 38 |             config.threshold,
 39 |             range_,
 40 |             passive_joint_idx,
 41 |             glue_bodies,
 42 |             ignored_contacts,
 43 |             contact_threshold,
 44 |             goal_bias,
 45 |             is_simplified,
 46 |             simplified_duration,
 47 |             config.seed,
 48 |         )
 49 |         self.non_limited_idx = non_limited_idx
 50 | 
 51 |     def convert_nonlimited(self, state):
 52 |         if self.non_limited_idx is not None:
 53 |             for idx in self.non_limited_idx:
 54 |                 state[idx] = joint_convert(state[idx])
 55 |         return state
 56 | 
 57 |     def isValidState(self, state):
 58 |         return self.planner.isValidState(state)
 59 | 
 60 |     def plan(self, start, goal, timelimit=1.0):
 61 |         valid_state = True
 62 |         exact = True
 63 |         converted_start = self.convert_nonlimited(start.copy())
 64 |         converted_goal = self.convert_nonlimited(goal.copy())
 65 |         states = np.array(self.planner.plan(converted_start, converted_goal, timelimit))
 66 | 
 67 |         if np.unique(states).size == 1:
 68 |             if states[0][0] == -5:
 69 |                 valid_state = False
 70 |             if states[0][0] == -4:
 71 |                 exact = False
 72 |             return states, states, valid_state, exact
 73 | 
 74 |         traj = [start]
 75 |         pre_state = states[0]
 76 |         for _, state in enumerate(states[1:]):
 77 |             # converted_pre_state = self.convert_nonlimited(pre_state.copy())
 78 |             tmp_state = traj[-1] + (state - pre_state)
 79 |             if self.non_limited_idx is not None:
 80 |                 for idx in self.non_limited_idx:
 81 |                     if abs(state[idx] - pre_state[idx]) > 3.14:
 82 |                         if pre_state[idx] > 0 and state[idx] <= 0:
 83 |                             # if traj[-1][idx] < 0:
 84 |                             tmp_state[idx] = traj[-1][idx] + (
 85 |                                 3.14 - pre_state[idx] + state[idx] + 3.14
 86 |                             )
 87 |                             # else:
 88 |                             #     tmp_state[idx] = traj[-1][idx] - (3.14-pre_state[idx] + state[idx] + 3.14)
 89 |                             # tmp_state[idx] = traj[-1][idx] + 3.14 + state[idx]
 90 |                         elif pre_state[idx] < 0 and state[idx] > 0:
 91 |                             # if traj[-1][idx] < 0:
 92 |                             tmp_state[idx] = traj[-1][idx] - (
 93 |                                 3.14 - state[idx] + pre_state[idx] + 3.14
 94 |                             )
 95 |                             # else:
 96 |                             #     tmp_state[idx] = traj[-1][idx] + (3.14-state[idx] + pre_state[idx] + 3.14)
 97 | 
 98 |                             # tmp_state[idx] = traj[-1][idx] - 3.14 + state[idx]
 99 |             pre_state = state
100 |             traj.append(tmp_state)
101 |         return np.array(traj), states, valid_state, exact
102 | 
103 |     def remove_collision(self, geom_id, contype, conaffinity):
104 |         self.planner.removeCollision(geom_id, contype, conaffinity)
105 | 
106 |     def get_planner_status(self):
107 |         return self.planner.getPlannerStatus().decode("utf-8")
108 | 


--------------------------------------------------------------------------------
/motion_planners/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages, Extension
 2 | from Cython.Build import cythonize
 3 | from distutils.sysconfig import get_python_lib
 4 | import glob
 5 | import os
 6 | import sys
 7 | import platform
 8 | 
 9 | prefix_path = os.environ['HOME']
10 | extensions = [
11 |     Extension('planner', ['planner.pyx', 'KinematicPlanner.cpp', './src/mujoco_ompl_interface.cpp', './src/mujoco_wrapper.cpp',
12 |                           ],
13 |               include_dirs=["./include/", '/usr/local/include/eigen3', './3rd_party/include/',
14 |                             os.path.join(prefix_path, '.mujoco/mujoco200/include/'), '/usr/local/include/ompl'],
15 |               extra_objects=['/usr/local/lib/libompl.so', os.path.join(prefix_path, '.mujoco/mujoco200/bin/libmujoco200.so')],
16 |               extra_compile_args=['-std=c++11'],
17 |               language="c++"),
18 | ]
19 | setup(
20 |     name='mujoco-ompl',
21 |     ext_modules=cythonize(extensions),
22 | )
23 | 


--------------------------------------------------------------------------------
/motion_planners/setup_macos.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages, Extension
 2 | from Cython.Build import cythonize
 3 | from distutils.sysconfig import get_python_lib
 4 | import glob
 5 | import os
 6 | import sys
 7 | import platform
 8 | 
 9 | # export MACOSX_DEPLOYMENT_TARGET=10.13
10 | 
11 | prefix_path = os.environ['HOME']
12 | ompl_lib_path = os.path.join(prefix_path, '/usr/local/lib/libompl.dylib')
13 | eigen_include_path = '/usr/local/include/eigen3'
14 | 
15 | extensions = [
16 |     Extension('planner', ['planner.pyx', 'KinematicPlanner.cpp', './src/mujoco_ompl_interface.cpp', './src/mujoco_wrapper.cpp',
17 |                           ],
18 |               include_dirs=["./include/", '/opt/local/include', eigen_include_path, './3rd_party/include/', '/opt/local/include/boost/',
19 |                             os.path.join(prefix_path, '.mujoco/mujoco200/include/'), '/usr/local/include/ompl', '/usr/local/include'],
20 |               extra_objects=[ompl_lib_path, os.path.join(prefix_path, '.mujoco/mujoco200/bin/libmujoco200.dylib')],
21 |               extra_compile_args=['-std=c++11', '-stdlib=libc++'],
22 |               language="c++"),
23 | ]
24 | setup(
25 |     name='mujoco-ompl',
26 |     ext_modules=cythonize(extensions),
27 | )
28 | 


--------------------------------------------------------------------------------
/motion_planners/src/compound_state_projector.cpp:
--------------------------------------------------------------------------------
1 | #include "compound_state_projector.h"
2 | 


--------------------------------------------------------------------------------
/motion_planners/src/mujoco_wrapper.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mujoco_wrapper.h"
 3 | 
 4 | using namespace std;
 5 | 
 6 | 
 7 | //////////////////////////////
 8 | // Init static variables
 9 | //////////////////////////////
10 | 
11 | int MuJoCo::mj_instance_count=0;
12 | std::mutex MuJoCo::mj_instance_count_lock;
13 | 
14 | //////////////////////////////
15 | // Define functions
16 | //////////////////////////////
17 | 
18 | std::ostream& operator<<(std::ostream& os, const JointInfo& ji) {
19 |     os << "Joint( name: \"" << ji.name << "\", "
20 |        << "type: " << ji.type << ", "
21 |        << "limited: " << ji.limited << ", "
22 |        << "range: (" << ji.range[0] << ", " << ji.range[1] << ") "
23 |        << ")";
24 |     return os;
25 | }
26 | 
27 | std::ostream& operator<<(std::ostream& os, const MuJoCoState& s) {
28 |     os << "{time: " << s.time << ", "
29 |        << "qpos: [";
30 |     for(auto const& i : s.qpos) {
31 |         os << i << ", ";
32 |     }
33 |     os << "] qvel: [";
34 |     for(auto const& i : s.qvel) {
35 |         os << i << ", ";
36 |     }
37 |     os << "] act: [";
38 |     for(auto const& i : s.act) {
39 |         os << i << ", ";
40 |     }
41 |     os << "] ctrl: [";
42 |     for(auto const& i : s.ctrl) {
43 |         os << i << ", ";
44 |     }
45 |     os << "]}";
46 |     return os;
47 | }
48 | 
49 | std::vector<JointInfo> getJointInfo(const mjModel* m) {
50 |     std::vector<JointInfo> joints;
51 |     for (size_t i=0; i < m->njnt; i++) {
52 |         JointInfo joint;
53 |         joint.name = std::string(m->names + m->name_jntadr[i]);
54 |         joint.type = m->jnt_type[i];
55 |         joint.limited = (bool) m->jnt_limited[i];
56 |         joint.range[0] = m->jnt_range[2*i];
57 |         joint.range[1] = m->jnt_range[2*i + 1];
58 |         joint.qposadr = m->jnt_qposadr[i];
59 |         joint.dofadr = m->jnt_dofadr[i];
60 |         joints.push_back(joint);
61 |     }
62 |     return joints;
63 | }
64 | 
65 | StateRange getCtrlRange(const mjModel* m, size_t i) {
66 |     StateRange r;
67 |     r.limited = (bool) m->actuator_ctrllimited[i];
68 |     r.range[0] = m->actuator_ctrlrange[2*i];
69 |     r.range[1] = m->actuator_ctrlrange[2*i + 1];
70 |     return r;
71 | }
72 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | matplotlib
 3 | torch
 4 | wandb
 5 | requests
 6 | moviepy==1.0.0
 7 | imageio
 8 | colorlog
 9 | pyquaternion
10 | tqdm
11 | mpi4py
12 | sklearn
13 | h5py
14 | ipdb
15 | scikit-image
16 | opencv-python
17 | dm_control
18 | gym==0.15.4
19 | mujoco-py==2.0.2.5
20 | pybullet==1.9.5
21 | 


--------------------------------------------------------------------------------
/rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/rl/__init__.py


--------------------------------------------------------------------------------
/rl/base_agent.py:
--------------------------------------------------------------------------------
 1 | from gym import spaces
 2 | import numpy as np
 3 | 
 4 | 
 5 | class BaseAgent(object):
 6 |     def __init__(self, config, ob_space):
 7 |         self._config = config
 8 | 
 9 |     def normalize(self, ob):
10 |         if self._config.ob_norm:
11 |             return self._ob_norm.normalize(ob)
12 |         return ob
13 | 
14 |     def act(self, ob, is_train=True, return_stds=False, random_exploration=False):
15 |         if random_exploration:
16 |             ac = self._ac_space.sample()
17 |             for k, space in self._ac_space.spaces.items():
18 |                 if isinstance(space, spaces.Discrete):
19 |                     ac[k] = np.array([ac[k]])
20 |             activation = None
21 |             stds = None
22 |             return ac, activation, stds
23 | 
24 |         if return_stds:
25 |             ac, activation, stds = self._actor.act(
26 |                 ob, is_train=is_train, return_stds=return_stds
27 |             )
28 |             return ac, activation, stds
29 |         else:
30 |             ac, activation = self._actor.act(
31 |                 ob, is_train=is_train, return_stds=return_stds
32 |             )
33 |             return ac, activation
34 | 
35 |     def store_episode(self, rollouts):
36 |         raise NotImplementedError()
37 | 
38 |     def replay_buffer(self):
39 |         return self._buffer.state_dict()
40 | 
41 |     def load_replay_buffer(self, state_dict):
42 |         self._buffer.load_state_dict(state_dict)
43 | 
44 |     def sync_networks(self):
45 |         raise NotImplementedError()
46 | 
47 |     def train(self):
48 |         raise NotImplementedError()
49 | 
50 |     def _soft_update_target_network(self, target, source, tau):
51 |         for target_param, param in zip(target.parameters(), source.parameters()):
52 |             target_param.data.copy_((1 - tau) * param.data + tau * target_param.data)
53 | 
54 |     def _copy_target_network(self, target, source):
55 |         for target_param, source_param in zip(target.parameters(), source.parameters()):
56 |             target_param.data.copy_(source_param.data)
57 | 


--------------------------------------------------------------------------------
/rl/dataset.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from time import time
  3 | 
  4 | import numpy as np
  5 | 
  6 | 
  7 | class ReplayBuffer:
  8 |     def __init__(self, keys, buffer_size, sample_func):
  9 |         self._size = buffer_size
 10 | 
 11 |         # memory management
 12 |         self._idx = 0
 13 |         self._current_size = 0
 14 |         self._sample_func = sample_func
 15 | 
 16 |         # create the buffer to store info
 17 |         self._keys = keys
 18 |         self._buffers = defaultdict(list)
 19 | 
 20 |     def clear(self):
 21 |         self._idx = 0
 22 |         self._current_size = 0
 23 |         self._buffers = defaultdict(list)
 24 | 
 25 |     # store the episode
 26 |     def store_episode(self, rollout):
 27 |         idx = self._idx = (self._idx + 1) % self._size
 28 |         self._current_size += 1
 29 | 
 30 |         if self._current_size > self._size:
 31 |             for k in self._keys:
 32 |                 self._buffers[k][idx] = rollout[k]
 33 |         else:
 34 |             for k in self._keys:
 35 |                 self._buffers[k].append(rollout[k])
 36 | 
 37 |     # sample the data from the replay buffer
 38 |     def sample(self, batch_size):
 39 |         # sample transitions
 40 |         transitions = self._sample_func(self._buffers, batch_size)
 41 |         return transitions
 42 | 
 43 |     def state_dict(self):
 44 |         return self._buffers
 45 | 
 46 |     def load_state_dict(self, state_dict):
 47 |         self._buffers = state_dict
 48 |         self._current_size = len(self._buffers["ac"])
 49 | 
 50 | 
 51 | class RandomSampler:
 52 |     def sample_func(self, episode_batch, batch_size_in_transitions):
 53 |         rollout_batch_size = len(episode_batch["ac"])
 54 |         batch_size = batch_size_in_transitions
 55 |         episode_idxs = np.random.randint(0, rollout_batch_size, batch_size)
 56 |         t_samples = [
 57 |             np.random.randint(len(episode_batch["ac"][episode_idx]))
 58 |             for episode_idx in episode_idxs
 59 |         ]
 60 | 
 61 |         transitions = {}
 62 |         for key in episode_batch.keys():
 63 |             transitions[key] = [
 64 |                 episode_batch[key][episode_idx][t]
 65 |                 for episode_idx, t in zip(episode_idxs, t_samples)
 66 |             ]
 67 | 
 68 |         if "ob_next" not in episode_batch.keys():
 69 |             transitions["ob_next"] = [
 70 |                 episode_batch["ob"][episode_idx][t + 1]
 71 |                 for episode_idx, t in zip(episode_idxs, t_samples)
 72 |             ]
 73 | 
 74 |         new_transitions = {}
 75 |         for k, v in transitions.items():
 76 |             if isinstance(v[0], dict):
 77 |                 sub_keys = v[0].keys()
 78 |                 new_transitions[k] = {
 79 |                     sub_key: np.stack([v_[sub_key] for v_ in v]) for sub_key in sub_keys
 80 |                 }
 81 |             else:
 82 |                 new_transitions[k] = np.stack(v)
 83 | 
 84 |         return new_transitions
 85 | 
 86 | 
 87 | class HERSampler:
 88 |     def __init__(self, replay_strategy, replay_k, reward_func=None):
 89 |         self.replay_strategy = replay_strategy
 90 |         if self.replay_strategy == "future":
 91 |             self.future_p = 1 - (1.0 / 1 + replay_k)
 92 |         else:
 93 |             self.future_p = 0
 94 |         self.reward_func = reward_func
 95 | 
 96 |     def sample_func(self, episode_batch, batch_size_in_transitions):
 97 |         rollout_batch_size = len(episode_batch["ac"])
 98 |         batch_size = batch_size_in_transitions
 99 | 
100 |         # select which rollouts and which timesteps to be used
101 |         episode_idxs = np.random.randint(0, rollout_batch_size, batch_size)
102 |         t_samples = [
103 |             np.random.randint(len(episode_batch["ac"][episode_idx]))
104 |             for episode_idx in episode_idxs
105 |         ]
106 | 
107 |         transitions = {}
108 |         for key in episode_batch.keys():
109 |             transitions[key] = [
110 |                 episode_batch[key][episode_idx][t]
111 |                 for episode_idx, t in zip(episode_idxs, t_samples)
112 |             ]
113 | 
114 |         transitions["ob_next"] = [
115 |             episode_batch["ob"][episode_idx][t + 1]
116 |             for episode_idx, t in zip(episode_idxs, t_samples)
117 |         ]
118 |         transitions["r"] = np.zeros((batch_size,))
119 | 
120 |         # hindsight experience replay
121 |         for i, (episode_idx, t) in enumerate(zip(episode_idxs, t_samples)):
122 |             replace_goal = np.random.uniform() < self.future_p
123 |             if replace_goal:
124 |                 future_t = np.random.randint(
125 |                     t + 1, len(episode_batch["ac"][episode_idx]) + 1
126 |                 )
127 |                 future_ag = episode_batch["ag"][episode_idx][future_t]
128 |                 if (
129 |                     self.reward_func(
130 |                         episode_batch["ag"][episode_idx][t], future_ag, None
131 |                     )
132 |                     < 0
133 |                 ):
134 |                     transitions["g"][i] = future_ag
135 | 
136 |             transitions["r"][i] = self.reward_func(
137 |                 episode_batch["ag"][episode_idx][t + 1], transitions["g"][i], None
138 |             )
139 | 
140 |         new_transitions = {}
141 |         for k, v in transitions.items():
142 |             if isinstance(v[0], dict):
143 |                 sub_keys = v[0].keys()
144 |                 new_transitions[k] = {
145 |                     sub_key: np.stack([v_[sub_key] for v_ in v]) for sub_key in sub_keys
146 |                 }
147 |             else:
148 |                 new_transitions[k] = np.stack(v)
149 | 
150 |         return new_transitions
151 | 


--------------------------------------------------------------------------------
/rl/main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import signal
  3 | import os
  4 | import json
  5 | from datetime import datetime
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from six.moves import shlex_quote
 10 | from mpi4py import MPI
 11 | from logging import CRITICAL
 12 | 
 13 | from config import argparser
 14 | from config.motion_planner import add_arguments as mp_add_arguments
 15 | from rl.trainer import Trainer
 16 | from util.logger import logger
 17 | 
 18 | 
 19 | np.set_printoptions(precision=3)
 20 | np.set_printoptions(suppress=True)
 21 | 
 22 | 
 23 | def run(config):
 24 |     rank = MPI.COMM_WORLD.Get_rank()
 25 |     config.rank = rank
 26 |     config.is_chef = rank == 0
 27 |     config.seed = config.seed + rank
 28 |     config.num_workers = MPI.COMM_WORLD.Get_size()
 29 |     config.is_mpi = False if config.num_workers == 1 else True
 30 | 
 31 |     if torch.get_num_threads() != 1:
 32 |         fair_num_threads = max(
 33 |             int(torch.get_num_threads() / MPI.COMM_WORLD.Get_size()), 1
 34 |         )
 35 |         torch.set_num_threads(fair_num_threads)
 36 | 
 37 |     if config.is_chef:
 38 |         logger.warning("Running a base worker.")
 39 |         make_log_files(config)
 40 |     else:
 41 |         logger.warning("Running worker %d and disabling logger", config.rank)
 42 |         logger.setLevel(CRITICAL)
 43 | 
 44 |         if config.date is None:
 45 |             now = datetime.now()
 46 |             date = now.strftime("%m.%d")
 47 |         else:
 48 |             date = config.date
 49 |         config.run_name = "rl.{}.{}.{}.{}".format(
 50 |             config.env, date, config.prefix, config.seed - rank
 51 |         )
 52 |         if config.group is None:
 53 |             config.group = "rl.{}.{}.{}".format(config.env, date, config.prefix)
 54 | 
 55 |         config.log_dir = os.path.join(config.log_root_dir, config.run_name)
 56 |         if config.is_train:
 57 |             config.record_dir = os.path.join(config.log_dir, "video")
 58 |         else:
 59 |             config.record_dir = os.path.join(config.log_dir, "eval_video")
 60 | 
 61 |     def shutdown(signal, frame):
 62 |         logger.warning("Received signal %s: exiting", signal)
 63 |         sys.exit(128 + signal)
 64 | 
 65 |     signal.signal(signal.SIGHUP, shutdown)
 66 |     signal.signal(signal.SIGINT, shutdown)
 67 |     signal.signal(signal.SIGTERM, shutdown)
 68 | 
 69 |     # set global seed
 70 |     np.random.seed(config.seed)
 71 |     torch.manual_seed(config.seed)
 72 |     torch.cuda.manual_seed_all(config.seed)
 73 | 
 74 |     os.environ["DISPLAY"] = ":1"
 75 | 
 76 |     if config.gpu is not None:
 77 |         os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(config.gpu)
 78 |         assert torch.cuda.is_available()
 79 |         config.device = torch.device("cuda")
 80 |     else:
 81 |         config.device = torch.device("cpu")
 82 | 
 83 |     # build a trainer
 84 |     trainer = Trainer(config)
 85 |     if config.is_train:
 86 |         trainer.train()
 87 |         logger.info("Finish training")
 88 |     else:
 89 |         trainer.evaluate()
 90 |         logger.info("Finish evaluating")
 91 | 
 92 | 
 93 | def make_log_files(config):
 94 |     if config.date is None:
 95 |         now = datetime.now()
 96 |         date = now.strftime("%m.%d")
 97 |     else:
 98 |         date = config.date
 99 |     # date = '07.25'
100 |     config.run_name = "rl.{}.{}.{}.{}".format(
101 |         config.env, date, config.prefix, config.seed
102 |     )
103 |     if config.group is None:
104 |         config.group = "rl.{}.{}.{}".format(config.env, date, config.prefix)
105 | 
106 |     config.log_dir = os.path.join(config.log_root_dir, config.run_name)
107 |     logger.info("Create log directory: %s", config.log_dir)
108 |     os.makedirs(config.log_dir, exist_ok=True)
109 | 
110 |     if config.is_train:
111 |         config.record_dir = os.path.join(config.log_dir, "video")
112 |     else:
113 |         config.record_dir = os.path.join(config.log_dir, "eval_video")
114 |     logger.info("Create video directory: %s", config.record_dir)
115 |     os.makedirs(config.record_dir, exist_ok=True)
116 | 
117 |     if config.is_train:
118 |         # log git diff
119 |         cmds = [
120 |             "echo `git rev-parse HEAD` >> {}/git.txt".format(config.log_dir),
121 |             "git diff >> {}/git.txt".format(config.log_dir),
122 |             "echo 'python -m rl.main {}' >> {}/cmd.sh".format(
123 |                 " ".join([shlex_quote(arg) for arg in sys.argv[1:]]), config.log_dir
124 |             ),
125 |         ]
126 |         os.system("\n".join(cmds))
127 | 
128 |         # log config
129 |         param_path = os.path.join(config.log_dir, "params.json")
130 |         logger.info("Store parameters in %s", param_path)
131 |         with open(param_path, "w") as fp:
132 |             json.dump(config.__dict__, fp, indent=4, sort_keys=True)
133 | 
134 | 
135 | if __name__ == "__main__":
136 |     parser = argparser()
137 |     args, unparsed = parser.parse_known_args()
138 | 
139 |     if "Pusher" in args.env:
140 |         from config.pusher import add_arguments
141 |     elif "Sawyer" in args.env:
142 |         from config.sawyer import add_arguments
143 |     else:
144 |         raise ValueError("args.env (%s) is not supported" % args.env)
145 | 
146 |     add_arguments(parser)
147 |     mp_add_arguments(parser)
148 |     args, unparsed = parser.parse_known_args()
149 | 
150 |     if args.debug:
151 |         args.rollout_length = 150
152 |         args.start_steps = 100
153 | 
154 |     if len(unparsed):
155 |         logger.error("Unparsed argument is detected:\n%s", unparsed)
156 |     else:
157 |         run(args)
158 | 


--------------------------------------------------------------------------------
/rl/planner_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from util.gym import action_size
 4 | from util.logger import logger
 5 | from motion_planners.sampling_based_planner import SamplingBasedPlanner
 6 | 
 7 | 
 8 | class PlannerAgent:
 9 |     def __init__(
10 |         self,
11 |         config,
12 |         ac_space,
13 |         non_limited_idx=None,
14 |         passive_joint_idx=[],
15 |         ignored_contacts=[],
16 |         planner_type=None,
17 |         goal_bias=0.05,
18 |         is_simplified=False,
19 |         simplified_duration=0.1,
20 |         range_=None,
21 |     ):
22 | 
23 |         self._config = config
24 |         self.planner = SamplingBasedPlanner(
25 |             config,
26 |             config._xml_path,
27 |             action_size(ac_space),
28 |             non_limited_idx,
29 |             planner_type=planner_type,
30 |             passive_joint_idx=passive_joint_idx,
31 |             ignored_contacts=ignored_contacts,
32 |             contact_threshold=config.contact_threshold,
33 |             goal_bias=goal_bias,
34 |             is_simplified=is_simplified,
35 |             simplified_duration=simplified_duration,
36 |             range_=range_,
37 |         )
38 | 
39 |         self._is_simplified = is_simplified
40 |         self._simplified_duration = simplified_duration
41 | 
42 |     def plan(self, start, goal, timelimit=None, attempts=15):
43 |         config = self._config
44 |         if timelimit is None:
45 |             timelimit = config.timelimit
46 |         traj, states, valid, exact = self.planner.plan(start, goal, timelimit)
47 |         success = valid and exact
48 | 
49 |         if success:
50 |             return traj[1:], success, valid, exact
51 |         else:
52 |             return traj, success, valid, exact
53 | 
54 |     def get_planner_status(self):
55 |         return self.planner.get_planner_status()
56 | 
57 |     def isValidState(self, state):
58 |         return self.planner.isValidState(state)
59 | 


--------------------------------------------------------------------------------
/rl/policies/__init__.py:
--------------------------------------------------------------------------------
1 | from .mlp_actor_critic import MlpActor, MlpCritic
2 | 
3 | 
4 | def get_actor_critic_by_name(name):
5 |     if name == "mlp":
6 |         return MlpActor, MlpCritic
7 |     else:
8 |         raise NotImplementedError()
9 | 


--------------------------------------------------------------------------------
/rl/policies/distributions.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.distributions
  8 | 
  9 | 
 10 | # Categorical
 11 | FixedCategorical = torch.distributions.Categorical
 12 | 
 13 | old_sample = FixedCategorical.sample
 14 | FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1)
 15 | 
 16 | log_prob_cat = FixedCategorical.log_prob
 17 | # FixedCategorical.log_probs = lambda self, actions: log_prob_cat(self, actions.squeeze(-1)).view(actions.size(0), -1).sum(-1).unsqueeze(-1)
 18 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat(
 19 |     self, actions.squeeze(-1)
 20 | ).unsqueeze(-1)
 21 | 
 22 | categorical_entropy = FixedCategorical.entropy
 23 | FixedCategorical.entropy = lambda self: categorical_entropy(self) * 10.0  # scaling
 24 | 
 25 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True)
 26 | 
 27 | # Normal
 28 | FixedNormal = torch.distributions.Normal
 29 | 
 30 | normal_init = FixedNormal.__init__
 31 | FixedNormal.__init__ = lambda self, mean, std: normal_init(
 32 |     self, mean.double(), std.double()
 33 | )
 34 | 
 35 | log_prob_normal = FixedNormal.log_prob
 36 | FixedNormal.log_probs = (
 37 |     lambda self, actions: log_prob_normal(self, actions.double())
 38 |     .sum(-1, keepdim=True)
 39 |     .float()
 40 | )
 41 | 
 42 | normal_entropy = FixedNormal.entropy
 43 | FixedNormal.entropy = lambda self: normal_entropy(self).sum(-1).float()
 44 | 
 45 | FixedNormal.mode = lambda self: self.mean.float()
 46 | 
 47 | normal_sample = FixedNormal.sample
 48 | FixedNormal.sample = lambda self: normal_sample(self).float()
 49 | 
 50 | normal_rsample = FixedNormal.rsample
 51 | FixedNormal.rsample = lambda self: normal_rsample(self).float()
 52 | 
 53 | 
 54 | def init(module, weight_init, bias_init, gain=1):
 55 |     weight_init(module.weight.data, gain=gain)
 56 |     bias_init(module.bias.data)
 57 |     return module
 58 | 
 59 | 
 60 | class AddBias(nn.Module):
 61 |     def __init__(self, bias):
 62 |         super().__init__()
 63 |         self._bias = nn.Parameter(bias.unsqueeze(1))
 64 | 
 65 |     def forward(self, x):
 66 |         if x.dim() == 2:
 67 |             bias = self._bias.t().view(1, -1)
 68 |         else:
 69 |             bias = self._bias.t().view(1, -1, 1, 1)
 70 |         return x + bias
 71 | 
 72 | 
 73 | class Categorical(nn.Module):
 74 |     def __init__(self):
 75 |         super().__init__()
 76 | 
 77 |     def forward(self, x):
 78 |         return FixedCategorical(logits=x)
 79 | 
 80 | 
 81 | class DiagGaussian(nn.Module):
 82 |     def __init__(self, config):
 83 |         super().__init__()
 84 |         self.logstd = AddBias(torch.zeros(config.action_size))
 85 |         self.config = config
 86 | 
 87 |     def forward(self, x):
 88 |         zeros = torch.zeros(x.size()).to(self.config.device)
 89 |         logstd = self.logstd(zeros)
 90 |         return FixedNormal(x, logstd.exp())
 91 | 
 92 | 
 93 | class MixedDistribution(nn.Module):
 94 |     def __init__(self, distributions):
 95 |         super().__init__()
 96 |         assert isinstance(distributions, OrderedDict)
 97 |         self.distributions = distributions
 98 | 
 99 |     def mode(self):
100 |         return OrderedDict([(k, dist.mode()) for k, dist in self.distributions.items()])
101 | 
102 |     def sample(self):
103 |         return OrderedDict(
104 |             [(k, dist.sample()) for k, dist in self.distributions.items()]
105 |         )
106 | 
107 |     def rsample(self):
108 |         return OrderedDict(
109 |             [(k, dist.rsample()) for k, dist in self.distributions.items()]
110 |         )
111 | 
112 |     def log_probs(self, x):
113 |         assert isinstance(x, dict)
114 |         return OrderedDict(
115 |             [(k, dist.log_probs(x[k])) for k, dist in self.distributions.items()]
116 |         )
117 | 
118 |     def entropy(self):
119 |         return sum([dist.entropy() for dist in self.distributions.values()])
120 | 
121 | 
122 | class GumbelSoftmax(torch.distributions.RelaxedOneHotCategorical):
123 |     """
124 |     A differentiable Categorical distribution using reparametrization trick with Gumbel-Softmax
125 |     Explanation http://amid.fish/assets/gumbel.html
126 |     NOTE: use this in place PyTorch's RelaxedOneHotCategorical distribution since its log_prob is not working right (returns positive values)
127 |     Papers:
128 |     [1] The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables (Maddison et al, 2017)
129 |     [2] Categorical Reparametrization with Gumbel-Softmax (Jang et al, 2017)
130 |     """
131 | 
132 |     def sample(self, sample_shape=torch.Size()):
133 |         """Gumbel-softmax sampling. Note rsample is inherited from RelaxedOneHotCategorical"""
134 |         u = torch.empty(
135 |             self.logits.size(), device=self.logits.device, dtype=self.logits.dtype
136 |         ).uniform_(0, 1)
137 |         noisy_logits = self.logits - torch.log(-torch.log(u))
138 |         return torch.argmax(noisy_logits, dim=-1)
139 | 
140 |     def rsample(self, sample_shape=torch.Size()):
141 |         """
142 |         Gumbel-softmax resampling using the Straight-Through trick.
143 |         Credit to Ian Temple for bringing this to our attention. To see standalone code of how this works, refer to https://gist.github.com/yzh119/fd2146d2aeb329d067568a493b20172f
144 |         """
145 |         rout = super().rsample(sample_shape)  # differentiable
146 |         out = F.one_hot(torch.argmax(rout, dim=-1), self.logits.shape[-1]).float()
147 |         return (out - rout).detach() + rout
148 | 
149 |     def log_prob(self, value):
150 |         """value is one-hot or relaxed"""
151 |         # if self.logits.shape[-1] == 1:
152 |         #     value = torch.zeros_like(value)
153 |         if value.shape != self.logits.shape:
154 |             value = F.one_hot(value.long(), self.logits.shape[-1]).float()
155 |             assert value.shape == self.logits.shape
156 |         return -torch.sum(-value * F.log_softmax(self.logits, -1), -1)  # scaling
157 | 
158 |     def entropy(self):
159 |         return self.base_dist._categorical.entropy()
160 | 
161 | 
162 | FixedGumbelSoftmax = GumbelSoftmax
163 | old_sample_gumbel = FixedGumbelSoftmax.sample
164 | FixedGumbelSoftmax.sample = lambda self: old_sample_gumbel(self).unsqueeze(-1)
165 | log_prob_gumbel = FixedGumbelSoftmax.log_prob
166 | FixedGumbelSoftmax.log_probs = lambda self, actions: log_prob_gumbel(
167 |     self, actions.squeeze(-1)
168 | ).unsqueeze(-1)
169 | gumbel_entropy = FixedGumbelSoftmax.entropy
170 | FixedGumbelSoftmax.entropy = lambda self: gumbel_entropy(self)
171 | FixedGumbelSoftmax.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True)
172 | gumbel_rsample = FixedGumbelSoftmax.rsample
173 | FixedGumbelSoftmax.rsample = lambda self: gumbel_rsample(self).float()
174 | 


--------------------------------------------------------------------------------
/rl/policies/mlp_actor_critic.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from gym import spaces
  6 | 
  7 | from rl.policies.utils import CNN, MLP
  8 | from rl.policies.actor_critic import Actor, Critic
  9 | from util.gym import observation_size, action_size
 10 | 
 11 | 
 12 | class MlpActor(Actor):
 13 |     def __init__(
 14 |         self,
 15 |         config,
 16 |         ob_space,
 17 |         ac_space,
 18 |         tanh_policy,
 19 |         deterministic=False,
 20 |         activation="relu",
 21 |         rl_hid_size=None,
 22 |     ):
 23 |         super().__init__(config, ob_space, ac_space, tanh_policy)
 24 | 
 25 |         self._ac_space = ac_space
 26 |         self._deterministic = deterministic
 27 |         if rl_hid_size == None:
 28 |             rl_hid_size = config.rl_hid_size
 29 | 
 30 |         # observation
 31 |         input_dim = observation_size(ob_space)
 32 | 
 33 |         self.fc = MLP(
 34 |             config,
 35 |             input_dim,
 36 |             rl_hid_size,
 37 |             [rl_hid_size] * config.actor_num_hid_layers,
 38 |             activation=activation,
 39 |         )
 40 |         self.fc_means = nn.ModuleDict()
 41 |         self.fc_log_stds = nn.ModuleDict()
 42 | 
 43 |         for k, space in ac_space.spaces.items():
 44 |             if isinstance(space, spaces.Box):
 45 |                 self.fc_means.update(
 46 |                     {
 47 |                         k: MLP(
 48 |                             config,
 49 |                             rl_hid_size,
 50 |                             action_size(space),
 51 |                             activation=activation,
 52 |                         )
 53 |                     }
 54 |                 )
 55 |                 if not self._deterministic:
 56 |                     self.fc_log_stds.update(
 57 |                         {
 58 |                             k: MLP(
 59 |                                 config,
 60 |                                 rl_hid_size,
 61 |                                 action_size(space),
 62 |                                 activation=activation,
 63 |                             )
 64 |                         }
 65 |                     )
 66 |             elif isinstance(space, spaces.Discrete):
 67 |                 self.fc_means.update(
 68 |                     {k: MLP(config, rl_hid_size, space.n, activation=activation)}
 69 |                 )
 70 |             else:
 71 |                 self.fc_means.update(
 72 |                     {k: MLP(config, rl_hid_size, space, activation=activation)}
 73 |                 )
 74 | 
 75 |     def forward(self, ob, deterministic=False):
 76 |         inp = list(ob.values())
 77 |         if len(inp[0].shape) == 1:
 78 |             inp = [x.unsqueeze(0) for x in inp]
 79 | 
 80 |         out = self._activation_fn(self.fc(torch.cat(inp, dim=-1)))
 81 |         out = torch.reshape(out, (out.shape[0], -1))
 82 | 
 83 |         means, stds = OrderedDict(), OrderedDict()
 84 | 
 85 |         for k, space in self._ac_space.spaces.items():
 86 |             mean = self.fc_means[k](out)
 87 |             if isinstance(space, spaces.Box) and not self._deterministic:
 88 |                 if self._config.algo == "ppo":
 89 |                     zeros = torch.zeros(mean.size()).to(self._config.device)
 90 |                     log_std = self.fc_log_stds[k](zeros)
 91 |                 else:
 92 |                     log_std = self.fc_log_stds[k](out)
 93 |                     log_std = torch.clamp(log_std, -10, 2)
 94 |                 std = torch.exp(log_std.double())
 95 |             else:
 96 |                 std = None
 97 |             means[k] = mean
 98 |             stds[k] = std
 99 |         return means, stds
100 | 
101 | 
102 | class MlpCritic(Critic):
103 |     def __init__(
104 |         self, config, ob_space, ac_space=None, activation="relu", rl_hid_size=None
105 |     ):
106 |         super().__init__(config)
107 | 
108 |         input_dim = observation_size(ob_space)
109 |         if ac_space is not None:
110 |             input_dim += action_size(ac_space)
111 | 
112 |         if rl_hid_size == None:
113 |             rl_hid_size = config.rl_hid_size
114 | 
115 |         self.fc = MLP(config, input_dim, 1, [rl_hid_size] * 2, activation=activation)
116 | 
117 |     def forward(self, ob, ac=None):
118 |         inp = list(ob.values())
119 |         if len(inp[0].shape) == 1:
120 |             inp = [x.unsqueeze(0) for x in inp]
121 |         if ac is not None:
122 |             ac = list(ac.values())
123 |             if len(ac[0].shape) == 1:
124 |                 ac = [x.unsqueeze(0) for x in ac]
125 |             inp.extend(ac)
126 | 
127 |         out = self.fc(torch.cat(inp, dim=-1))
128 |         out = torch.reshape(out, (out.shape[0], 1))
129 | 
130 |         return out
131 | 
132 | 
133 | # Necessary for my KFAC implementation.
134 | class AddBias(nn.Module):
135 |     def __init__(self, bias):
136 |         super(AddBias, self).__init__()
137 |         self._bias = nn.Parameter(bias.unsqueeze(1))
138 | 
139 |     def forward(self, x):
140 |         if x.dim() == 2:
141 |             bias = self._bias.t().view(1, -1)
142 |         else:
143 |             bias = self._bias.t().view(1, -1, 1, 1)
144 | 
145 |         return x + bias
146 | 


--------------------------------------------------------------------------------
/rl/policies/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class CNN(nn.Module):
  8 |     def __init__(self, config, input_dim):
  9 |         super().__init__()
 10 |         self.activation_fn = nn.ReLU()
 11 | 
 12 |         self.convs = nn.ModuleList()
 13 |         w = config.img_width
 14 |         init_ = lambda m: init(
 15 |             m,
 16 |             nn.init.orthogonal_,
 17 |             lambda x: nn.init.constant_(x, 0),
 18 |             nn.init.calculate_gain("relu"),
 19 |         )
 20 | 
 21 |         for k, s, d in zip(config.kernel_size, config.stride, config.conv_dim):
 22 |             self.convs.append(init_(nn.Conv2d(input_dim, d, int(k), int(s))))
 23 |             w = int(np.floor((w - (int(k) - 1) - 1) / int(s) + 1))
 24 |             input_dim = d
 25 | 
 26 |         # screen_width == 32 (8,4)-(3,2) -> 3x3
 27 |         # screen_width == 64 (8,4)-(3,2)-(3,2) -> 3x3
 28 |         # screen_width == 128 (8,4)-(3,2)-(3,2)-(3,2) -> 3x3
 29 |         # screen_width == 256 (8,4)-(3,2)-(3,2)-(3,2) -> 7x7
 30 | 
 31 |         print("Output of CNN = %d x %d x %d" % (w, w, d))
 32 |         self.w = w
 33 |         self.output_size = w * w * d
 34 | 
 35 |     def forward(self, ob):
 36 |         out = ob
 37 |         for conv in self.convs:
 38 |             out = self.activation_fn(conv(out))
 39 |         out = out.flatten(start_dim=1)
 40 |         return out
 41 | 
 42 | 
 43 | def fanin_init(tensor):
 44 |     size = tensor.size()
 45 |     if len(size) == 2:
 46 |         fan_in = size[0]
 47 |     elif len(size) > 2:
 48 |         fan_in = np.prod(size[1:])
 49 |     else:
 50 |         raise Exception("Shape must be have dimension at least 2.")
 51 |     bound = 1.0 / np.sqrt(fan_in)
 52 |     return tensor.data.uniform_(-bound, bound)
 53 | 
 54 | 
 55 | class MLP(nn.Module):
 56 |     def __init__(
 57 |         self,
 58 |         config,
 59 |         input_dim,
 60 |         output_dim,
 61 |         hid_dims=[],
 62 |         last_activation=False,
 63 |         activation="relu",
 64 |     ):
 65 |         super().__init__()
 66 |         if activation == "relu":
 67 |             activation_fn = nn.ReLU()
 68 |         elif activation == "tanh":
 69 |             activation_fn = nn.Tanh()
 70 |         elif acitvation == "elu":
 71 |             activation_fn = nn.Elu()
 72 |         else:
 73 |             return NotImplementedError
 74 | 
 75 |         init_ = lambda m: init(
 76 |             m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)
 77 |         )
 78 | 
 79 |         fc = []
 80 |         prev_dim = input_dim
 81 |         for d in hid_dims:
 82 |             fc.append(nn.Linear(prev_dim, d))
 83 |             fanin_init(fc[-1].weight)
 84 |             fc[-1].bias.data.fill_(0.1)
 85 |             fc.append(activation_fn)
 86 |             prev_dim = d
 87 |         fc.append(nn.Linear(prev_dim, output_dim))
 88 |         fc[-1].weight.data.uniform_(-1e-3, 1e-3)
 89 |         fc[-1].bias.data.uniform_(-1e-3, 1e-3)
 90 |         if last_activation:
 91 |             fc.append(activation_fn)
 92 |         self.fc = nn.Sequential(*fc)
 93 | 
 94 |     def forward(self, ob):
 95 |         return self.fc(ob)
 96 | 
 97 | 
 98 | def init(module, weight_init, bias_init, gain=1):
 99 |     weight_init(module.weight.data, gain=gain)
100 |     bias_init(module.bias.data)
101 |     return module
102 | 


--------------------------------------------------------------------------------
/scripts/2d/baseline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | algo='sac'
 5 | prefix="BASELINE.SAC"
 6 | env="PusherObstacle-v0"
 7 | max_episode_step="400"
 8 | debug="True"
 9 | log_root_dir="./logs"
10 | reward_scale='10.'
11 | vis_replay="True"
12 | success_reward='150.'
13 | 
14 | python -m rl.main \
15 |     --log_root_dir $log_root_dir \
16 |     --prefix $prefix \
17 |     --env $env \
18 |     --gpu $gpu \
19 |     --max_episode_step $max_episode_step \
20 |     --debug $debug \
21 |     --algo $algo \
22 |     --seed $seed \
23 |     --reward_scale $reward_scale \
24 |     --vis_replay $vis_replay \
25 |     --success_reward $success_reward \
26 | 


--------------------------------------------------------------------------------
/scripts/2d/baseline_ik.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | algo='sac'
 5 | prefix="BASELINE.SAC.IK"
 6 | env="PusherObstacle-v0"
 7 | gpu=$gpu
 8 | max_episode_step="400"
 9 | debug="False"
10 | log_root_dir="./logs"
11 | reward_scale='10.'
12 | vis_replay="True"
13 | success_reward='150.'
14 | use_ik_target="True"
15 | action_range="0.01"
16 | 
17 | python -m rl.main \
18 |     --log_root_dir $log_root_dir \
19 |     --prefix $prefix \
20 |     --env $env \
21 |     --gpu $gpu \
22 |     --max_episode_step $max_episode_step \
23 |     --debug $debug \
24 |     --algo $algo \
25 |     --seed $seed \
26 |     --reward_scale $reward_scale \
27 |     --vis_replay $vis_replay \
28 |     --success_reward $success_reward \
29 |     --use_ik_target $use_ik_target \
30 |     --ckpt_interval $ckpt_interval \
31 |     --action_range $action_range
32 | 


--------------------------------------------------------------------------------
/scripts/2d/baseline_lg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | algo='sac'
 5 | prefix="BASELINE.SAC.LG"
 6 | env="PusherObstacle-v0"
 7 | max_episode_step="400"
 8 | debug="False"
 9 | log_root_dir="./logs"
10 | reward_scale='10.'
11 | vis_replay="True"
12 | success_reward='150.'
13 | expand_ac_space="True"
14 | action_range='1.0'
15 | use_smdp_update="True"
16 | 
17 | python -m rl.main \
18 |     --log_root_dir $log_root_dir \
19 |     --prefix $prefix \
20 |     --env $env \
21 |     --gpu $gpu \
22 |     --max_episode_step $max_episode_step \
23 |     --debug $debug \
24 |     --algo $algo \
25 |     --seed $seed \
26 |     --reward_scale $reward_scale \
27 |     --vis_replay $vis_replay \
28 |     --success_reward $success_reward \
29 |     --expand_ac_space $expand_ac_space \
30 |     --action_range $action_range \
31 |     --use_smdp_update $use_smdp_update
32 | 


--------------------------------------------------------------------------------
/scripts/2d/mopa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | 
 5 | algo='sac'
 6 | prefix="MoPA-SAC"
 7 | env="PusherObstacle-v0"
 8 | max_episode_step="400"
 9 | debug="False"
10 | log_root_dir="./logs"
11 | mopa="True"
12 | reward_scale="0.2"
13 | reuse_data="True"
14 | action_range="1.0"
15 | omega='0.5'
16 | use_smdp_update="True"
17 | stochastic_eval="True"
18 | invalid_target_handling="True"
19 | max_reuse_data='30'
20 | ac_space_type="piecewise"
21 | success_reward="150.0"
22 | 
23 | 
24 | python -m rl.main \
25 |     --log_root_dir $log_root_dir \
26 |     --prefix $prefix \
27 |     --env $env \
28 |     --gpu $gpu \
29 |     --max_episode_step $max_episode_step \
30 |     --debug $debug \
31 |     --algo $algo \
32 |     --seed $seed \
33 |     --mopa $mopa \
34 |     --reward_scale $reward_scale \
35 |     --reuse_data $reuse_data \
36 |     --action_range $action_range \
37 |     --omega $omega \
38 |     --success_reward $success_reward \
39 |     --stochastic_eval $stochastic_eval \
40 |     --invalid_target_handling $invalid_target_handling \
41 |     --max_reuse_data $max_reuse_data \
42 |     --ac_space_type $ac_space_type \
43 |     --use_smdp_update $use_smdp_update \
44 | 


--------------------------------------------------------------------------------
/scripts/2d/mopa_discrete.sh:
--------------------------------------------------------------------------------
 1 | #/!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | algo='sac'
 5 | prefix="MoPA-SAC.discrete"
 6 | env="PusherObstacle-v0"
 7 | max_episode_step="400"
 8 | debug="False"
 9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | mopa="True"
12 | reward_scale="0.2"
13 | reuse_data="True"
14 | action_range="1.0"
15 | stochastic_eval="True"
16 | invalid_target_handling="True"
17 | max_reuse_data='30'
18 | use_smdp_update="True"
19 | ac_space_type="normal"
20 | success_reward="150.0"
21 | discrete_action="True"
22 | 
23 | 
24 | #mpiexec -n $workers
25 | python -m rl.main \
26 |     --log_root_dir $log_root_dir \
27 |     --prefix $prefix \
28 |     --env $env \
29 |     --gpu $gpu \
30 |     --max_episode_step $max_episode_step \
31 |     --debug $debug \
32 |     --algo $algo \
33 |     --seed $seed \
34 |     --reward_type $reward_type \
35 |     --comment $comment \
36 |     --mopa $mopa \
37 |     --reward_scale $reward_scale \
38 |     --reuse_data $reuse_data \
39 |     --action_range $action_range \
40 |     --discrete_action $discrete_action \
41 |     --success_reward $success_reward \
42 |     --stochastic_eval $stochastic_eval \
43 |     --invalid_target_handling $invalid_target_handling \
44 |     --max_reuse_data $max_reuse_data \
45 |     --use_smdp_update $use_smdp_update \
46 |     --ac_space_type $ac_space_type \
47 | 


--------------------------------------------------------------------------------
/scripts/2d/mopa_ik.sh:
--------------------------------------------------------------------------------
 1 | #/!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | 
 5 | algo='sac'
 6 | prefix="MoPA-SAC.IK"
 7 | env="PusherObstacle-v0"
 8 | max_episode_step="400"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reward_scale="0.2"
14 | reuse_data="True"
15 | action_range="0.1"
16 | stochastic_eval="True"
17 | invalid_target_handling="True"
18 | max_reuse_data='30'
19 | use_smdp_update="True"
20 | success_reward="150.0"
21 | use_ik_target="True"
22 | ik_target="fingertip"
23 | omega='0.1'
24 | 
25 | python -m rl.main \
26 |     --log_root_dir $log_root_dir \
27 |     --prefix $prefix \
28 |     --env $env \
29 |     --gpu $gpu \
30 |     --max_episode_step $max_episode_step \
31 |     --debug $debug \
32 |     --algo $algo \
33 |     --seed $seed \
34 |     --reward_type $reward_type \
35 |     --mopa $mopa \
36 |     --reward_scale $reward_scale \
37 |     --reuse_data $reuse_data \
38 |     --action_range $action_range \
39 |     --success_reward $success_reward \
40 |     --stochastic_eval $stochastic_eval \
41 |     --invalid_target_handling $invalid_target_handling \
42 |     --max_reuse_data $max_reuse_data \
43 |     --use_smdp_update $use_smdp_update \
44 |     --use_ik_target $use_ik_target \
45 |     --ik_target $ik_target \
46 |     --omega $omega
47 | 


--------------------------------------------------------------------------------
/scripts/3d/assembly/baseline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | prefix="BASELINE"
 5 | env="SawyerAssemblyObstacle-v0"
 6 | algo='sac'
 7 | max_episode_step="250"
 8 | debug="False"
 9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | vis_replay="True"
12 | plot_type='3d'
13 | success_reward='150.'
14 | reward_scale="10."
15 | 
16 | python -m rl.main \
17 |     --log_root_dir $log_root_dir \
18 |     --prefix $prefix \
19 |     --env $env \
20 |     --gpu $gpu \
21 |     --max_episode_step $max_episode_step \
22 |     --debug $debug \
23 |     --algo $algo \
24 |     --seed $seed \
25 |     --reward_type $reward_type \
26 |     --vis_replay $vis_replay \
27 |     --plot_type $plot_type \
28 |     --success_reward $success_reward \
29 |     --reward_scale $reward_scale
30 | 


--------------------------------------------------------------------------------
/scripts/3d/assembly/baseline_ik.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | prefix="BASELINE.IK"
 5 | env="SawyerAssemblyObstacle-v0"
 6 | algo='sac'
 7 | max_episode_step="250"
 8 | debug="False"
 9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | vis_replay="True"
12 | plot_type='3d'
13 | success_reward='150.'
14 | reward_scale="10."
15 | use_ik_target="True"
16 | ik_target="grip_site"
17 | action_range="0.001"
18 | 
19 | python -m rl.main \
20 |     --log_root_dir $log_root_dir \
21 |     --prefix $prefix \
22 |     --env $env \
23 |     --gpu $gpu \
24 |     --max_episode_step $max_episode_step \
25 |     --debug $debug \
26 |     --algo $algo \
27 |     --seed $seed \
28 |     --reward_type $reward_type \
29 |     --vis_replay $vis_replay \
30 |     --plot_type $plot_type \
31 |     --success_reward $success_reward \
32 |     --reward_scale $reward_scale \
33 |     --use_ik_target $use_ik_target \
34 |     --ik_target $ik_target \
35 |     --action_range $action_range
36 | 


--------------------------------------------------------------------------------
/scripts/3d/assembly/baseline_lg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | prefix="BASELINE.SAC.LG"
 5 | env="SawyerAssemblyObstacle-v0"
 6 | data='08.09'
 7 | algo='sac'
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | vis_replay="True"
13 | plot_type='3d'
14 | success_reward='150.'
15 | reward_scale="10."
16 | action_range="0.5"
17 | expand_ac_space='True'
18 | use_smdp_update="True"
19 | 
20 | python -m rl.main \
21 |     --log_root_dir $log_root_dir \
22 |     --prefix $prefix \
23 |     --env $env \
24 |     --gpu $gpu \
25 |     --max_episode_step $max_episode_step \
26 |     --debug $debug \
27 |     --algo $algo \
28 |     --seed $seed \
29 |     --reward_type $reward_type \
30 |     --vis_replay $vis_replay \
31 |     --plot_type $plot_type \
32 |     --success_reward $success_reward \
33 |     --reward_scale $reward_scale \
34 |     --action_range $action_range \
35 |     --expand_ac_space $expand_ac_space \
36 |     --use_smdp_update $use_smdp_update
37 | 


--------------------------------------------------------------------------------
/scripts/3d/assembly/mopa.sh:
--------------------------------------------------------------------------------
 1 | #<!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | 
 5 | prefix="MoPA-SAC"
 6 | algo='sac'
 7 | env="SawyerAssemblyObstacle-v0"
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reuse_data="True"
14 | action_range="0.5"
15 | omega='0.7'
16 | stochastic_eval="True"
17 | invalid_target_handling="True"
18 | vis_replay="True"
19 | plot_type='3d'
20 | ac_space_type="piecewise"
21 | use_smdp_update="True"
22 | success_reward="150.0"
23 | max_reuse_data='15'
24 | reward_scale="1.0"
25 | 
26 | python -m rl.main \
27 |     --log_root_dir $log_root_dir \
28 |     --prefix $prefix \
29 |     --env $env \
30 |     --gpu $gpu \
31 |     --max_episode_step $max_episode_step \
32 |     --debug $debug \
33 |     --algo $algo \
34 |     --seed $seed \
35 |     --reward_type $reward_type \
36 |     --mopa $mopa \
37 |     --reuse_data $reuse_data \
38 |     --action_range $action_range \
39 |     --omega $omega \
40 |     --stochastic_eval $stochastic_eval \
41 |     --invalid_target_handling $invalid_target_handling \
42 |     --vis_replay $vis_replay \
43 |     --plot_type $plot_type \
44 |     --use_smdp_update $use_smdp_update \
45 |     --ac_space_type $ac_space_type \
46 |     --success_reward $success_reward \
47 |     --max_reuse_data $max_reuse_data \
48 |     --reward_scale $reward_scale
49 | 


--------------------------------------------------------------------------------
/scripts/3d/assembly/mopa_discrete.sh:
--------------------------------------------------------------------------------
 1 | #<!/bin/bash -x
 2 | 
 3 | prefix="SAC.PLANNER.AUGMENTED.discrete"
 4 | gpu=$1
 5 | seed=$2
 6 | algo='sac'
 7 | env="SawyerAssemblyObstacle-v0"
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reuse_data="True"
14 | action_range="0.5"
15 | stochastic_eval="True"
16 | invalid_target_handling="True"
17 | vis_replay="True"
18 | plot_type='3d'
19 | ac_space_type="normal"
20 | use_smdp_update="True"
21 | step_size="0.02"
22 | success_reward="150.0"
23 | max_reuse_data='30'
24 | reward_scale="1.0"
25 | evaluate_interval="10000"
26 | discrete_action="True"
27 | 
28 | python -m rl.main \
29 |     --log_root_dir $log_root_dir \
30 |     --prefix $prefix \
31 |     --env $env \
32 |     --gpu $gpu \
33 |     --max_episode_step $max_episode_step \
34 |     --debug $debug \
35 |     --algo $algo \
36 |     --seed $seed \
37 |     --reward_type $reward_type \
38 |     --mopa $mopa \
39 |     --reuse_data $reuse_data \
40 |     --action_range $action_range \
41 |     --discrete_action $discrete_action \
42 |     --stochastic_eval $stochastic_eval \
43 |     --invalid_target_handling $invalid_target_handling \
44 |     --vis_replay $vis_replay \
45 |     --plot_type $plot_type \
46 |     --use_smdp_update $use_smdp_update \
47 |     --ac_space_type $ac_space_type \
48 |     --step_size $step_size \
49 |     --success_reward $success_reward \
50 |     --max_reuse_data $max_reuse_data \
51 |     --reward_scale $reward_scale \
52 |     --evaluate_interval $evaluate_interval \
53 | 


--------------------------------------------------------------------------------
/scripts/3d/assembly/mopa_ik.sh:
--------------------------------------------------------------------------------
 1 | #<!/bin/bash -x
 2 | 
 3 | prefix="MoPA-SAC.IK"
 4 | gpu=$1
 5 | seed=$2
 6 | algo='sac'
 7 | env="SawyerAssemblyObstacle-v0"
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reuse_data="True"
14 | action_range="0.2"
15 | stochastic_eval="True"
16 | invalid_target_handling="True"
17 | vis_replay="True"
18 | plot_type='3d'
19 | use_smdp_update="True"
20 | step_size="0.02"
21 | success_reward="150.0"
22 | max_reuse_data='30'
23 | reward_scale="1.0"
24 | use_ik_target="True"
25 | ik_target="grip_site"
26 | omega='0.05'
27 | 
28 | python -m rl.main \
29 |     --log_root_dir $log_root_dir \
30 |     --prefix $prefix \
31 |     --env $env \
32 |     --gpu $gpu \
33 |     --max_episode_step $max_episode_step \
34 |     --debug $debug \
35 |     --algo $algo \
36 |     --seed $seed \
37 |     --reward_type $reward_type \
38 |     --mopa $mopa \
39 |     --reuse_data $reuse_data \
40 |     --action_range $action_range \
41 |     --stochastic_eval $stochastic_eval \
42 |     --invalid_target_handling $invalid_target_handling \
43 |     --vis_replay $vis_replay \
44 |     --plot_type $plot_type \
45 |     --use_smdp_update $use_smdp_update \
46 |     --step_size $step_size \
47 |     --success_reward $success_reward \
48 |     --max_reuse_data $max_reuse_data \
49 |     --reward_scale $reward_scale \
50 |     --use_ik_target $use_ik_target \
51 |     --ik_target $ik_target \
52 |     --omega $omega \
53 | 


--------------------------------------------------------------------------------
/scripts/3d/lift/baseline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | prefix="BASELINE"
 5 | env="SawyerLiftObstacle-v0"
 6 | algo='sac'
 7 | max_episode_step="250"
 8 | debug="False"
 9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | vis_replay="True"
12 | plot_type='3d'
13 | success_reward='150.'
14 | reward_scale="10."
15 | use_ik_target="False"
16 | ik_target="grip_site"
17 | action_range="0.001"
18 | 
19 | python -m rl.main \
20 |     --log_root_dir $log_root_dir \
21 |     --prefix $prefix \
22 |     --env $env \
23 |     --gpu $gpu \
24 |     --max_episode_step $max_episode_step \
25 |     --debug $debug \
26 |     --algo $algo \
27 |     --seed $seed \
28 |     --reward_type $reward_type \
29 |     --vis_replay $vis_replay \
30 |     --plot_type $plot_type \
31 |     --success_reward $success_reward \
32 |     --reward_scale $reward_scale \
33 |     --use_ik_target $use_ik_target \
34 |     --ik_target $ik_target \
35 |     --action_range $action_range
36 | 


--------------------------------------------------------------------------------
/scripts/3d/lift/baseline_ik.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | 
 5 | prefix="BASELINE"
 6 | env="SawyerLiftObstacle-v0"
 7 | algo='sac'
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | vis_replay="True"
13 | plot_type='3d'
14 | success_reward='150.'
15 | reward_scale="10."
16 | use_ik_target="True"
17 | ik_target="grip_site"
18 | action_range="0.001"
19 | 
20 | python -m rl.main \
21 |     --log_root_dir $log_root_dir \
22 |     --prefix $prefix \
23 |     --env $env \
24 |     --gpu $gpu \
25 |     --max_episode_step $max_episode_step \
26 |     --debug $debug \
27 |     --algo $algo \
28 |     --seed $seed \
29 |     --reward_type $reward_type \
30 |     --vis_replay $vis_replay \
31 |     --plot_type $plot_type \
32 |     --success_reward $success_reward \
33 |     --reward_scale $reward_scale \
34 |     --use_ik_target $use_ik_target \
35 |     --ik_target $ik_target \
36 |     --action_range $action_range
37 | 


--------------------------------------------------------------------------------
/scripts/3d/lift/baseline_lg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | prefix="BASELINE.action_range0.5.v8"
 5 | env="SawyerLiftObstacle-v0"
 6 | algo='sac'
 7 | max_episode_step="250"
 8 | debug="False"
 9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | vis_replay="True"
12 | plot_type='3d'
13 | success_reward='150.'
14 | reward_scale="10."
15 | expand_ac_space='True'
16 | use_smdp_update="True"
17 | action_range="0.5"
18 | 
19 | python -m rl.main \
20 |     --log_root_dir $log_root_dir \
21 |     --prefix $prefix \
22 |     --env $env \
23 |     --gpu $gpu \
24 |     --max_episode_step $max_episode_step \
25 |     --debug $debug \
26 |     --algo $algo \
27 |     --seed $seed \
28 |     --reward_type $reward_type \
29 |     --vis_replay $vis_replay \
30 |     --plot_type $plot_type \
31 |     --success_reward $success_reward \
32 |     --reward_scale $reward_scale \
33 |     --action_range $action_range \
34 |     --expand_ac_space $expand_ac_space \
35 |     --use_smdp_update $use_smdp_update 
36 | 


--------------------------------------------------------------------------------
/scripts/3d/lift/mopa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | 
 5 | prefix="MoPA-SAC"
 6 | env="SawyerLiftObstacle-v0"
 7 | algo='sac'
 8 | max_episode_step="250"
 9 | debug="False"
10 | log_root_dir="./logs"
11 | mopa="True"
12 | reuse_data="True"
13 | action_range="0.5"
14 | omega='0.5'
15 | stochastic_eval="True"
16 | invalid_target_handling="True"
17 | vis_replay="True"
18 | plot_type='3d'
19 | ac_space_type="piecewise"
20 | use_smdp_update="True"
21 | success_reward="150.0"
22 | add_curr_rew="True"
23 | max_reuse_data='15'
24 | reward_scale="0.5"
25 | evaluate_interval="10000"
26 | ckpt_interval='10000'
27 | # timelimit="1.5"
28 | 
29 | python -m rl.main \
30 |     --log_root_dir $log_root_dir \
31 |     --prefix $prefix \
32 |     --env $env \
33 |     --gpu $gpu \
34 |     --max_episode_step $max_episode_step \
35 |     --debug $debug \
36 |     --algo $algo \
37 |     --seed $seed \
38 |     --mopa $mopa \
39 |     --reuse_data $reuse_data \
40 |     --action_range $action_range \
41 |     --omega $omega \
42 |     --stochastic_eval $stochastic_eval \
43 |     --invalid_target_handling $invalid_target_handling \
44 |     --vis_replay $vis_replay \
45 |     --plot_type $plot_type \
46 |     --use_smdp_update $use_smdp_update \
47 |     --ac_space_type $ac_space_type \
48 |     --success_reward $success_reward \
49 |     --max_reuse_data $max_reuse_data \
50 |     --reward_scale $reward_scale \
51 |     --evaluate_interval $evaluate_interval \
52 |     --ckpt_interval $ckpt_interval \
53 |     # --timelimit $timelimit
54 | 


--------------------------------------------------------------------------------
/scripts/3d/lift/mopa_discrete.sh:
--------------------------------------------------------------------------------
 1 | #<!/bin/bash -x
 2 | 
 3 | prefix="MoPA-SAC.discrete"
 4 | env="SawyerLiftObstacle-v0"
 5 | gpu=$1
 6 | seed=$2
 7 | algo='sac'
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reuse_data="True"
14 | action_range="0.5"
15 | stochastic_eval="True"
16 | invalid_target_handling="True"
17 | vis_replay="True"
18 | plot_type='3d'
19 | ac_space_type="normal"
20 | use_smdp_update="True"
21 | step_size="0.02"
22 | success_reward="150.0"
23 | max_reuse_data='15'
24 | reward_scale="0.5"
25 | evaluate_interval="10000"
26 | discrete_action="True"
27 | 
28 | python -m rl.main \
29 |     --log_root_dir $log_root_dir \
30 |     --prefix $prefix \
31 |     --env $env \
32 |     --gpu $gpu \
33 |     --max_episode_step $max_episode_step \
34 |     --debug $debug \
35 |     --algo $algo \
36 |     --seed $seed \
37 |     --reward_type $reward_type \
38 |     --mopa $mopa \
39 |     --reuse_data $reuse_data \
40 |     --action_range $action_range \
41 |     --discrete_action $discrete_action \
42 |     --stochastic_eval $stochastic_eval \
43 |     --invalid_target_handling $invalid_target_handling \
44 |     --vis_replay $vis_replay \
45 |     --plot_type $plot_type \
46 |     --use_smdp_update $use_smdp_update \
47 |     --ac_space_type $ac_space_type \
48 |     --step_size $step_size \
49 |     --success_reward $success_reward \
50 |     --max_reuse_data $max_reuse_data \
51 |     --reward_scale $reward_scale \
52 |     --evaluate_interval $evaluate_interval \
53 | 


--------------------------------------------------------------------------------
/scripts/3d/lift/mopa_ik.sh:
--------------------------------------------------------------------------------
 1 | #<!/bin/bash -x
 2 | 
 3 | prefix="MoPA-SAC.IK"
 4 | gpu=$1
 5 | seed=$2
 6 | algo='sac'
 7 | env="SawyerLiftObstacle-v0"
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reuse_data="True"
14 | action_range="0.2"
15 | stochastic_eval="True"
16 | invalid_target_handling="True"
17 | vis_replay="True"
18 | plot_type='3d'
19 | use_smdp_update="True"
20 | use_discount_meta="True"
21 | step_size="0.02"
22 | success_reward="150.0"
23 | max_reuse_data='15'
24 | reward_scale="0.2"
25 | use_ik_target="True"
26 | ik_target="grip_site"
27 | omega='0.05'
28 | 
29 | python -m rl.main \
30 |     --log_root_dir $log_root_dir \
31 |     --prefix $prefix \
32 |     --env $env \
33 |     --gpu $gpu \
34 |     --max_episode_step $max_episode_step \
35 |     --omega $omega \
36 |     --debug $debug \
37 |     --algo $algo \
38 |     --seed $seed \
39 |     --reward_type $reward_type \
40 |     --mopa $mopa \
41 |     --reuse_data $reuse_data \
42 |     --action_range $action_range \
43 |     --stochastic_eval $stochastic_eval \
44 |     --invalid_target_handling $invalid_target_handling \
45 |     --vis_replay $vis_replay \
46 |     --plot_type $plot_type \
47 |     --use_smdp_update $use_smdp_update \
48 |     --step_size $step_size \
49 |     --success_reward $success_reward \
50 |     --max_reuse_data $max_reuse_data \
51 |     --reward_scale $reward_scale \
52 |     --use_ik_target $use_ik_target \
53 |     --ik_target $ik_target \
54 |     --use_discount_meta $use_discount_meta \
55 | 


--------------------------------------------------------------------------------
/scripts/3d/push/baseline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | prefix="BASELINE"
 5 | env="SawyerPushObstacle-v0"
 6 | algo='sac'
 7 | max_episode_step="250"
 8 | debug="False"
 9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | vis_replay="True"
12 | plot_type='3d'
13 | success_reward='150.'
14 | reward_scale="10."
15 | use_ik_target="False"
16 | ik_target="grip_site"
17 | action_range="0.001"
18 | 
19 | python -m rl.main \
20 |     --log_root_dir $log_root_dir \
21 |     --prefix $prefix \
22 |     --env $env \
23 |     --gpu $gpu \
24 |     --max_episode_step $max_episode_step \
25 |     --debug $debug \
26 |     --algo $algo \
27 |     --seed $seed \
28 |     --reward_type $reward_type \
29 |     --vis_replay $vis_replay \
30 |     --plot_type $plot_type \
31 |     --success_reward $success_reward \
32 |     --reward_scale $reward_scale \
33 |     --use_ik_target $use_ik_target \
34 |     --ik_target $ik_target \
35 |     --action_range $action_range
36 | 


--------------------------------------------------------------------------------
/scripts/3d/push/baseline_ik.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | 
 5 | prefix="BASELINE.IK"
 6 | env="SawyerPushObstacle-v0"
 7 | algo='sac'
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | vis_replay="True"
13 | plot_type='3d'
14 | success_reward='150.'
15 | reward_scale="10."
16 | use_ik_target="True"
17 | ik_target="grip_site"
18 | action_range="0.001"
19 | 
20 | python -m rl.main \
21 |     --log_root_dir $log_root_dir \
22 |     --prefix $prefix \
23 |     --env $env \
24 |     --gpu $gpu \
25 |     --max_episode_step $max_episode_step \
26 |     --debug $debug \
27 |     --algo $algo \
28 |     --seed $seed \
29 |     --reward_type $reward_type \
30 |     --vis_replay $vis_replay \
31 |     --plot_type $plot_type \
32 |     --success_reward $success_reward \
33 |     --reward_scale $reward_scale \
34 |     --use_ik_target $use_ik_target \
35 |     --ik_target $ik_target \
36 |     --action_range $action_range
37 | 


--------------------------------------------------------------------------------
/scripts/3d/push/baseline_lg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | prefix="BASELINE.LG"
 5 | env="SawyerPushObstacle-v0"
 6 | algo='sac'
 7 | max_episode_step="250"
 8 | debug="False"
 9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | vis_replay="True"
12 | plot_type='3d'
13 | success_reward='150.'
14 | reward_scale="10."
15 | action_range="0.5"
16 | expand_ac_space='True'
17 | use_smdp_update="True"
18 | 
19 | python -m rl.main \
20 |     --log_root_dir $log_root_dir \
21 |     --prefix $prefix \
22 |     --env $env \
23 |     --gpu $gpu \
24 |     --max_episode_step $max_episode_step \
25 |     --debug $debug \
26 |     --algo $algo \
27 |     --seed $seed \
28 |     --reward_type $reward_type \
29 |     --vis_replay $vis_replay \
30 |     --plot_type $plot_type \
31 |     --success_reward $success_reward \
32 |     --reward_scale $reward_scale \
33 |     --action_range $action_range \
34 |     --expand_ac_space $expand_ac_space \
35 |     --use_smdp_update $use_smdp_update 
36 | 


--------------------------------------------------------------------------------
/scripts/3d/push/mopa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | gpu=$1
 3 | seed=$2
 4 | 
 5 | prefix="MoPA-SAC"
 6 | algo='sac'
 7 | env="SawyerPushObstacle-v0"
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reuse_data="True"
14 | action_range="0.5"
15 | omega='0.7'
16 | stochastic_eval="True"
17 | invalid_target_handling="True"
18 | vis_replay="True"
19 | plot_type='3d'
20 | ac_space_type="piecewise"
21 | use_smdp_update="True"
22 | success_reward="150.0"
23 | max_reuse_data='15'
24 | reward_scale="0.8"
25 | evaluate_interval="10000"
26 | timelimit='2.0'
27 | 
28 | python -m rl.main \
29 |     --log_root_dir $log_root_dir \
30 |     --prefix $prefix \
31 |     --env $env \
32 |     --gpu $gpu \
33 |     --max_episode_step $max_episode_step \
34 |     --debug $debug \
35 |     --algo $algo \
36 |     --seed $seed \
37 |     --reward_type $reward_type \
38 |     --mopa $mopa \
39 |     --reuse_data $reuse_data \
40 |     --action_range $action_range \
41 |     --omega $omega \
42 |     --stochastic_eval $stochastic_eval \
43 |     --invalid_target_handling $invalid_target_handling \
44 |     --vis_replay $vis_replay \
45 |     --plot_type $plot_type \
46 |     --use_smdp_update $use_smdp_update \
47 |     --ac_space_type $ac_space_type \
48 |     --success_reward $success_reward \
49 |     --max_reuse_data $max_reuse_data \
50 |     --reward_scale $reward_scale \
51 |     --evaluate_interval $evaluate_interval \
52 |     --timelimit $timelimit \
53 | 


--------------------------------------------------------------------------------
/scripts/3d/push/mopa_discrete.sh:
--------------------------------------------------------------------------------
 1 | #<!/bin/bash -x
 2 | 
 3 | prefix="MoPA=SAC.discrete"
 4 | algo='sac'
 5 | gpu=$1
 6 | seed=$2
 7 | env="SawyerPushObstacle-v0"
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reuse_data="True"
14 | action_range="0.5"
15 | invalid_planner_rew="-0.0"
16 | stochastic_eval="True"
17 | invalid_target_handling="True"
18 | vis_replay="True"
19 | plot_type='3d'
20 | ac_space_type="piecewise"
21 | use_smdp_update="True"
22 | step_size="0.02"
23 | success_reward="150.0"
24 | discount_factor='0.99'
25 | max_reuse_data='15'
26 | reward_scale="1.0"
27 | evaluate_interval="10000"
28 | discrete_action="True"
29 | omega='0.0'
30 | 
31 | 
32 | python -m rl.main \
33 |     --log_root_dir $log_root_dir \
34 |     --prefix $prefix \
35 |     --env $env \
36 |     --gpu $gpu \
37 |     --max_episode_step $max_episode_step \
38 |     --debug $debug \
39 |     --algo $algo \
40 |     --seed $seed \
41 |     --reward_type $reward_type \
42 |     --mopa $mopa \
43 |     --reuse_data $reuse_data \
44 |     --action_range $action_range \
45 |     --discrete_action $discrete_action \
46 |     --stochastic_eval $stochastic_eval \
47 |     --invalid_target_handling $invalid_target_handling \
48 |     --vis_replay $vis_replay \
49 |     --plot_type $plot_type \
50 |     --use_smdp_update $use_smdp_update \
51 |     --ac_space_type $ac_space_type \
52 |     --step_size $step_size \
53 |     --success_reward $success_reward \
54 |     --max_reuse_data $max_reuse_data \
55 |     --reward_scale $reward_scale \
56 |     --evaluate_interval $evaluate_interval \
57 |     --omega $omega
58 | 


--------------------------------------------------------------------------------
/scripts/3d/push/mopa_ik.sh:
--------------------------------------------------------------------------------
 1 | #<!/bin/bash -x
 2 | 
 3 | prefix="MoPA-SAC.IK"
 4 | gpu=$1
 5 | seed=$2
 6 | algo='sac'
 7 | env="SawyerPushObstacle-v0"
 8 | max_episode_step="250"
 9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reuse_data="True"
14 | action_range="0.1"
15 | stochastic_eval="True"
16 | invalid_target_handling="True"
17 | vis_replay="True"
18 | plot_type='3d'
19 | use_smdp_update="True"
20 | step_size="0.02"
21 | success_reward="150.0"
22 | max_reuse_data='15'
23 | reward_scale="1.0"
24 | use_ik_target="True"
25 | ik_target="grip_site"
26 | omega='0.05'
27 | 
28 | python -m rl.main \
29 |     --log_root_dir $log_root_dir \
30 |     --prefix $prefix \
31 |     --env $env \
32 |     --gpu $gpu \
33 |     --max_episode_step $max_episode_step \
34 |     --debug $debug \
35 |     --algo $algo \
36 |     --seed $seed \
37 |     --reward_type $reward_type \
38 |     --mopa $mopa \
39 |     --action_range $action_range \
40 |     --stochastic_eval $stochastic_eval \
41 |     --invalid_target_handling $invalid_target_handling \
42 |     --vis_replay $vis_replay \
43 |     --plot_type $plot_type \
44 |     --use_smdp_update $use_smdp_update \
45 |     --step_size $step_size \
46 |     --success_reward $success_reward \
47 |     --max_reuse_data $max_reuse_data \
48 |     --reward_scale $reward_scale \
49 |     --use_ik_target $use_ik_target \
50 |     --ik_target $ik_target \
51 |     --omega $omega
52 | 


--------------------------------------------------------------------------------
/scripts/misc/evaluate_safety.sh:
--------------------------------------------------------------------------------
1 | python -m rl.main --log_root_dir ./logs --wandb True --prefix 07.14.BASELINE.v12 --max_global_step 60000000 --env sawyer-peg-insertion-obstacle-v2 --gpu 1 --max_episode_step 250 --evaluate_interval 10000 --entropy_loss_coef 1e-3 --buffer_size 1000000 --num_batches 1 --debug False --rollout_length 10000 --batch_size 256 --clip_param 0.2 --rl_activation relu --algo sac --seed 1236 --ctrl_reward 1e-2 --reward_type sparse --comment Baseline --start_steps 10000 --actor_num_hid_layers 2 --env_debug False --log_freq 1000 --log_interval 1000 --alpha 1.0 --vis_replay True --task_level easy --plot_type 3d --success_reward 150. --reward_scale 10.  --is_train False
2 | 
3 | # python -m rl.main --log_root_dir ./logs --wandb True --prefix 07.15.SAC.PLANNER.AUGMENTED.piecewise0.5.ac_range0.5.scale0.2.reuse.sparse.v12 --env sawyer-peg-insertion-obstacle-v2 --gpu 1 --max_episode_step 250 --num_batches 1 --debug False --batch_size 256 --rl_activation relu --algo sac --seed 1235 --reward_type sparse --comment Sanity Check --log_freq 1000 --log_interval 1000 --planner_integration True --allow_manipulation_collision True --alpha 1.0 --reuse_data_type random --action_range 0.5 --ac_rl_maximum 0.5 --ac_rl_minimum -0.5 --invalid_planner_rew -0.0 --extended_action False --stochastic_eval True --find_collision_free True --use_double_planner False --vis_replay True --task_level easy --use_cum_rew True --plot_type 3d --use_smdp_update True --ac_space_type piecewise --use_discount_meta True --step_size 0.02 --success_reward 150.0 --add_curr_rew True --discount_factor 0.99 --max_reuse_data 15 --min_reuse_span 20 --reward_scale 0.2 --is_train False
4 | 


--------------------------------------------------------------------------------
/scripts/misc/installEigen.sh:
--------------------------------------------------------------------------------
 1 | version="3.3.7"
 2 | eigen="eigen-$version"
 3 | echo "Installing Dependenices"
 4 | sudo apt-get -y install build-essential checkinstall cmake pkg-config yasm
 5 | echo "Downloading $eigen"
 6 | mkdir $eigen
 7 | cd $eigen
 8 | wget -O $eigen.tar.bz2 https://gitlab.com/libeigen/eigen/-/archive/$version/eigen-$version.tar.bz2
 9 | mkdir $eigen
10 | tar --strip-components=1 -xvjf $eigen.tar.bz2 -C $eigen
11 | echo "Installing $eigen"
12 | cd $eigen
13 | mkdir build
14 | cd build
15 | cmake ..
16 | make
17 | sudo make install
18 | echo "Finished: $eigen is ready to be used"
19 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
 1 | def str2bool(v):
 2 |     return v.lower() in ("true", "1")
 3 | 
 4 | 
 5 | def str2intlist(value):
 6 |     if not value:
 7 |         return value
 8 |     else:
 9 |         return [int(num) for num in value.split(",")]
10 | 
11 | 
12 | def str2list(value):
13 |     if not value:
14 |         return value
15 |     else:
16 |         return [num for num in value.split(",")]
17 | 


--------------------------------------------------------------------------------
/util/contact_info.py:
--------------------------------------------------------------------------------
 1 | def str_mj_arr(arr):
 2 |     return " ".join(["%0.3f" % arr[i] for i in range(arr.shape[0])])
 3 | 
 4 | 
 5 | def print_contact_info(sim):
 6 |     if sim.data.ncon == 0:
 7 |         print("No contacts/collisions")
 8 |         return
 9 | 
10 |     # Print contact metadata
11 |     for coni in range(sim.data.ncon):
12 |         print("  Contact %d:" % (coni,))
13 |         con = sim.data.contact[coni]
14 |         print("    dist     = %0.3f" % (con.dist,))
15 |         print("    pos      = %s" % (str_mj_arr(con.pos),))
16 |         print("    frame    = %s" % (str_mj_arr(con.frame),))
17 |         print("    friction = %s" % (str_mj_arr(con.friction),))
18 |         print("    dim      = %d" % (con.dim,))
19 |         print("    geom1    = %d, %s" % (con.geom1, sim.model.geom_id2name(con.geom1)))
20 |         print("    geom2    = %d, %s" % (con.geom2, sim.model.geom_id2name(con.geom2)))
21 | 


--------------------------------------------------------------------------------
/util/gym.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import spaces
 3 | import cv2
 4 | 
 5 | 
 6 | def observation_size(observation_space):
 7 |     if isinstance(observation_space, spaces.Dict):
 8 |         return sum(
 9 |             [observation_size(value) for key, value in observation_space.spaces.items()]
10 |         )
11 |     elif isinstance(observation_space, spaces.Box):
12 |         return np.product(observation_space.shape)
13 | 
14 | 
15 | def action_size(action_space):
16 |     if isinstance(action_space, spaces.Dict):
17 |         return sum([action_size(value) for key, value in action_space.spaces.items()])
18 |     elif isinstance(action_space, spaces.Box):
19 |         return np.product(action_space.shape)
20 |     elif isinstance(action_space, spaces.Discrete):
21 |         return action_space.n
22 |     elif isinstance(action_space, spaces.MultiDiscrete):
23 |         return np.product(action_space.nvec)
24 |     elif isinstance(action_space, spaces.MultiBinary):
25 |         return action_space.n
26 | 


--------------------------------------------------------------------------------
/util/info.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Info(object):
 7 |     def __init__(self, info=None):
 8 |         if info is None:
 9 |             info = defaultdict(list)
10 |         self._info = info.copy()
11 | 
12 |     def add(self, info):
13 |         if isinstance(info, Info):
14 |             for k, v in info._info.items():
15 |                 self._info[k].extend(v)
16 |         elif isinstance(info, dict):
17 |             for k, v in info.items():
18 |                 if isinstance(v, list):
19 |                     self._info[k].extend(v)
20 |                 else:
21 |                     self._info[k].append(v)
22 |         else:
23 |             raise ValueError("info should be dict or Info (%s)" % info)
24 | 
25 |     def clear(self):
26 |         self._info = defaultdict(list)
27 | 
28 |     def get_dict(self, reduction="mean", only_scalar=False):
29 |         ret = {}
30 |         for k, v in self._info.items():
31 |             if np.isscalar(v):
32 |                 ret[k] = v
33 |             elif isinstance(v[0], (int, float, bool, np.float32, np.int64)):
34 |                 if "_mean" in k or reduction == "mean":
35 |                     ret[k] = np.mean(v)
36 |                 elif reduction == "sum":
37 |                     ret[k] = np.sum(v)
38 |             elif not only_scalar:
39 |                 ret[k] = v
40 |         self.clear()
41 |         return ret
42 | 
43 |     def __get_item__(self, key):
44 |         return self._info[key]
45 | 
46 |     def __set_item__(self, key, value):
47 |         self._info[key] = value
48 | 
49 |     def items(self):
50 |         return self._info.items()
51 | 


--------------------------------------------------------------------------------
/util/logger.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import logging
 3 | 
 4 | import numpy as np
 5 | import colorlog
 6 | 
 7 | 
 8 | formatter = colorlog.ColoredFormatter(
 9 |     "%(log_color)s[%(asctime)s] %(levelname)s: %(message)s",
10 |     datefmt="%m/%d %H:%M",
11 |     reset=True,
12 |     log_colors={
13 |         "DEBUG": "cyan",
14 |         "INFO": "white",
15 |         "WARNING": "yellow",
16 |         "ERROR": "red,bold",
17 |         "CRITICAL": "red,bg_white",
18 |     },
19 |     secondary_log_colors={},
20 |     style="%",
21 | )
22 | 
23 | logger = colorlog.getLogger("MoPA-RL")
24 | logger.setLevel(logging.DEBUG)
25 | 
26 | # fh = logging.FileHandler('log')
27 | # fh.setLevel(logging.DEBUG)
28 | # fh.setFormatter(formatter)
29 | # logger.addHandler(fh)
30 | 
31 | ch = colorlog.StreamHandler()
32 | ch.setLevel(logging.DEBUG)
33 | ch.setFormatter(formatter)
34 | logger.addHandler(ch)
35 | logger.propagate = False
36 | 
37 | 
38 | class StopWatch(object):
39 |     def __init__(self):
40 |         self.start = {}
41 |         self.times = {}
42 | 
43 |     def begin(self, name):
44 |         self.start[name] = time.time()
45 | 
46 |     def end(self, name):
47 |         if name not in self.times:
48 |             self.times[name] = []
49 |         assert name in self.start, "%s cannot be found in Stop Watch" % name
50 | 
51 |         self.times[name].append(time.time() - self.start[name])
52 | 
53 |     def display(self):
54 |         print("----Times----")
55 |         for name in self.times:
56 |             print(name, np.mean(self.times[name]))
57 | 
58 |         self.times = {}
59 | 


--------------------------------------------------------------------------------
/util/misc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import moviepy.editor as mpy
 3 | import cv2
 4 | 
 5 | 
 6 | def save_video(fpath, frames, fps=8.0):
 7 |     def f(t):
 8 |         frame_length = len(frames)
 9 |         new_fps = 1.0 / (1.0 / fps + 1.0 / frame_length)
10 |         idx = min(int(t * new_fps), frame_length - 1)
11 |         return frames[idx]
12 | 
13 |     video = mpy.VideoClip(f, duration=len(frames) / fps + 2)
14 |     video.write_videofile(fpath, fps, verbose=False)
15 |     print("[*] Video saved: {}".format(fpath))
16 | 
17 | 
18 | def make_ordered_pair(id1, id2):
19 |     return (min(id1, id2), max(id1, id2))
20 | 
21 | 
22 | def render_frame(env, step, info={}):
23 |     color = (200, 200, 200)
24 |     text = "Step: {}".format(step)
25 |     frame = env.render("rgb_array") * 255.0
26 |     fheight, fwidth = frame.shape[:2]
27 |     frame = np.concatenate([frame, np.zeros((fheight, fwidth, 3))], 0)
28 | 
29 |     font_size = 0.4
30 |     thickness = 1
31 |     offset = 12
32 |     x, y = 5, fheight + 10
33 |     cv2.putText(
34 |         frame,
35 |         text,
36 |         (x, y),
37 |         cv2.FONT_HERSHEY_SIMPLEX,
38 |         font_size,
39 |         (255, 255, 0),
40 |         thickness,
41 |         cv2.LINE_AA,
42 |     )
43 | 
44 |     for i, k in enumerate(info.keys()):
45 |         v = info[k]
46 |         key_text = "{}: ".format(k)
47 |         (key_width, _), _ = cv2.getTextSize(
48 |             key_text, cv2.FONT_HERSHEY_SIMPLEX, font_size, thickness
49 |         )
50 |         cv2.putText(
51 |             frame,
52 |             key_text,
53 |             (x, y + offset * (i + 2)),
54 |             cv2.FONT_HERSHEY_SIMPLEX,
55 |             font_size,
56 |             (66, 133, 244),
57 |             thickness,
58 |             cv2.LINE_AA,
59 |         )
60 |         cv2.putText(
61 |             frame,
62 |             str(v),
63 |             (x + key_width, y + offset * (i + 2)),
64 |             cv2.FONT_HERSHEY_SIMPLEX,
65 |             font_size,
66 |             (255, 255, 255),
67 |             thickness,
68 |             cv2.LINE_AA,
69 |         )
70 |     return frame
71 | 
72 | 
73 | # workaround for mujoco py issue #390
74 | def mujocopy_render_hack():
75 |     render_hack = false  # set to true for bugfix on bad openGL context
76 |     if render_hack:
77 |         print("Setting an offscreen GlfwContext. See mujoco-py issue #390")
78 |         from mujoco_py import GlfwContext
79 | 
80 |         GlfwContext(offscreen=True)  # Create a window to init GLFW.
81 |     return
82 | 


--------------------------------------------------------------------------------
/util/mpi.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from mpi4py import MPI
 3 | 
 4 | 
 5 | def _mpi_average(x):
 6 |     buf = np.zeros_like(x)
 7 |     MPI.COMM_WORLD.Allreduce(x, buf, op=MPI.SUM)
 8 |     buf /= MPI.COMM_WORLD.Get_size()
 9 |     return buf
10 | 
11 | 
12 | # Average across the cpu's data
13 | def mpi_average(x):
14 |     if isinstance(x, dict):
15 |         keys = sorted(x.keys())
16 |         return {k: _mpi_average(np.array(x[k])) for k in keys}
17 |     else:
18 |         return _mpi_average(np.array(x))
19 | 
20 | 
21 | def _mpi_sum(x):
22 |     buf = np.zeros_like(x)
23 |     MPI.COMM_WORLD.Allreduce(x, buf, op=MPI.SUM)
24 |     return buf
25 | 
26 | 
27 | # Sum over the cpu's data
28 | def mpi_sum(x):
29 |     if isinstance(x, dict):
30 |         keys = sorted(x.keys())
31 |         return {k: _mpi_sum(np.array(x[k])) for k in keys}
32 |     else:
33 |         return _mpi_sum(np.array(x))
34 | 


--------------------------------------------------------------------------------
/util/sawyer_env.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import OrderedDict
  3 | from numbers import Number
  4 | 
  5 | import numpy as np
  6 | from gym.spaces import Box
  7 | import mujoco_py
  8 | 
  9 | ENV_ASSET_DIR = os.path.join(os.path.dirname(__file__), "../env/assets")
 10 | 
 11 | 
 12 | def create_stats_ordered_dict(
 13 |     name,
 14 |     data,
 15 |     stat_prefix=None,
 16 |     always_show_all_stats=True,
 17 |     exclude_max_min=False,
 18 | ):
 19 |     if stat_prefix is not None:
 20 |         name = "{} {}".format(stat_prefix, name)
 21 |     if isinstance(data, Number):
 22 |         return OrderedDict({name: data})
 23 | 
 24 |     if len(data) == 0:
 25 |         return OrderedDict()
 26 | 
 27 |     if isinstance(data, tuple):
 28 |         ordered_dict = OrderedDict()
 29 |         for number, d in enumerate(data):
 30 |             sub_dict = create_stats_ordered_dict(
 31 |                 "{0}_{1}".format(name, number),
 32 |                 d,
 33 |             )
 34 |             ordered_dict.update(sub_dict)
 35 |         return ordered_dict
 36 | 
 37 |     if isinstance(data, list):
 38 |         try:
 39 |             iter(data[0])
 40 |         except TypeError:
 41 |             pass
 42 |         else:
 43 |             data = np.concatenate(data)
 44 | 
 45 |     if isinstance(data, np.ndarray) and data.size == 1 and not always_show_all_stats:
 46 |         return OrderedDict({name: float(data)})
 47 | 
 48 |     stats = OrderedDict(
 49 |         [
 50 |             (name + " Mean", np.mean(data)),
 51 |             (name + " Std", np.std(data)),
 52 |         ]
 53 |     )
 54 |     if not exclude_max_min:
 55 |         stats[name + " Max"] = np.max(data)
 56 |         stats[name + " Min"] = np.min(data)
 57 |     return stats
 58 | 
 59 | 
 60 | def get_generic_path_information(paths, stat_prefix=""):
 61 |     """
 62 |     Get an OrderedDict with a bunch of statistic names and values.
 63 |     """
 64 |     statistics = OrderedDict()
 65 |     returns = [sum(path["rewards"]) for path in paths]
 66 | 
 67 |     rewards = np.vstack([path["rewards"] for path in paths])
 68 |     statistics.update(
 69 |         create_stats_ordered_dict("Rewards", rewards, stat_prefix=stat_prefix)
 70 |     )
 71 |     statistics.update(
 72 |         create_stats_ordered_dict("Returns", returns, stat_prefix=stat_prefix)
 73 |     )
 74 |     actions = [path["actions"] for path in paths]
 75 |     if len(actions[0].shape) == 1:
 76 |         actions = np.hstack([path["actions"] for path in paths])
 77 |     else:
 78 |         actions = np.vstack([path["actions"] for path in paths])
 79 |     statistics.update(
 80 |         create_stats_ordered_dict("Actions", actions, stat_prefix=stat_prefix)
 81 |     )
 82 |     statistics["Num Paths"] = len(paths)
 83 | 
 84 |     return statistics
 85 | 
 86 | 
 87 | def get_average_returns(paths):
 88 |     returns = [sum(path["rewards"]) for path in paths]
 89 |     return np.mean(returns)
 90 | 
 91 | 
 92 | def get_path_lengths(paths):
 93 |     return [len(path["observations"]) for path in paths]
 94 | 
 95 | 
 96 | def get_stat_in_paths(paths, dict_name, scalar_name):
 97 |     if len(paths) == 0:
 98 |         return np.array([[]])
 99 | 
100 |     if type(paths[0][dict_name]) == dict:
101 |         # Support rllab interface
102 |         return [path[dict_name][scalar_name] for path in paths]
103 | 
104 |     return [[info[scalar_name] for info in path[dict_name]] for path in paths]
105 | 
106 | 
107 | def get_asset_full_path(file_name):
108 |     return os.path.join(ENV_ASSET_DIR, file_name)
109 | 
110 | 
111 | def concatenate_box_spaces(*spaces):
112 |     """
113 |     Assumes dtypes of all spaces are the of the same type
114 |     """
115 |     low = np.concatenate([space.low for space in spaces])
116 |     high = np.concatenate([space.high for space in spaces])
117 |     return Box(low=low, high=high, dtype=np.float32)
118 | 
119 | 
120 | def quat2axisangle(quat):
121 |     theta = 0
122 |     axis = np.array([0, 0, 1])
123 |     sin_theta = np.linalg.norm(quat[1:])
124 | 
125 |     if sin_theta > 0.0001:
126 |         theta = 2 * np.arcsin(sin_theta)
127 |         theta *= 1 if quat[0] >= 0 else -1
128 |         axis = quat[1:] / sin_theta
129 | 
130 |     return axis, theta
131 | 
132 | 
133 | def quat_to_zangle(quat):
134 |     q = quat_mul(quat_inv(quat_create(np.array([0, 1.0, 0]), np.pi / 2)), quat)
135 |     ax, angle = quat2axisangle(q)
136 |     return angle
137 | 
138 | 
139 | def zangle_to_quat(zangle):
140 |     """
141 |     :param zangle in rad
142 |     :return: quaternion
143 |     """
144 |     return quat_mul(
145 |         quat_create(np.array([0, 1.0, 0]), np.pi / 2),
146 |         quat_create(np.array([-1.0, 0, 0]), zangle),
147 |     )
148 | 
149 | 
150 | def quat_create(axis, angle):
151 |     """
152 |     Create a quaternion from an axis and angle.
153 |     :param axis The three dimensional axis
154 |     :param angle The angle in radians
155 |     :return: A 4-d array containing the components of a quaternion.
156 |     """
157 |     quat = np.zeros([4], dtype="float")
158 |     mujoco_py.functions.mju_axisAngle2Quat(quat, axis, angle)
159 |     return quat
160 | 
161 | 
162 | def quat_inv(quat):
163 |     """
164 |     Invert a quaternion, represented by a 4d array.
165 |     :param A quaternion (4-d array). Must not be the zero quaternion (all elements equal to zero)
166 |     :return: A 4-d array containing the components of a quaternion.
167 |     """
168 |     d = 1.0 / np.sum(quat ** 2)
169 |     return d * np.array([1.0, -1.0, -1.0, -1.0]) * quat
170 | 
171 | 
172 | def quat_mul(quat1, quat2):
173 |     """
174 |     Multiply two quaternions, both represented as 4-d arrays.
175 |     """
176 |     prod_quat = np.zeros([4], dtype="float")
177 |     mujoco_py.functions.mju_mulQuat(prod_quat, quat1, quat2)
178 |     return prod_quat
179 | 


--------------------------------------------------------------------------------