├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── config
├── __init__.py
├── motion_planner.py
├── pusher.py
└── sawyer.py
├── docs
├── content.md
├── img
│ ├── 2D_push.png
│ ├── clvrbanner.png
│ ├── favicon-32x32.png
│ ├── method.png
│ ├── result.png
│ ├── sawyer_assembly.png
│ ├── sawyer_lift.png
│ ├── sawyer_push.png
│ └── teaser.png
├── index.html
├── js
│ └── figure-extension.js
├── theme.css
└── video
│ ├── push_baseline.mp4
│ ├── sawyer_assembly_baseline.mp4
│ ├── sawyer_assembly_baseline_lg.mp4
│ ├── sawyer_assembly_mopa.mp4
│ ├── sawyer_lift_baseline.mp4
│ ├── sawyer_lift_baseline_lg.mp4
│ ├── sawyer_lift_mopa.gif
│ ├── sawyer_lift_mopa.mp4
│ ├── sawyer_push_baseline.mp4
│ ├── sawyer_push_baseline_lg.mp4
│ ├── sawyer_push_mopa.gif
│ ├── sawyer_push_mopa.mp4
│ └── teaser.gif
├── env
├── __init__.py
├── assets
│ ├── meshes
│ │ ├── sawyer
│ │ │ ├── base.stl
│ │ │ ├── head.stl
│ │ │ ├── l0.stl
│ │ │ ├── l1.stl
│ │ │ ├── l2.stl
│ │ │ ├── l3.stl
│ │ │ ├── l4.stl
│ │ │ ├── l5.stl
│ │ │ ├── l6.stl
│ │ │ └── pedestal.stl
│ │ ├── toy_table
│ │ │ ├── 0.stl
│ │ │ ├── 1.stl
│ │ │ ├── 2.stl
│ │ │ ├── 3.stl
│ │ │ └── 4.stl
│ │ └── two_finger_gripper
│ │ │ ├── electric_gripper_base.STL
│ │ │ ├── half_round_tip.STL
│ │ │ └── standard_narrow.STL
│ ├── objects
│ │ ├── can-visual.xml
│ │ ├── can.xml
│ │ └── meshes
│ │ │ └── can.stl
│ ├── textures
│ │ ├── can.png
│ │ ├── dark-wood.png
│ │ ├── grid.png
│ │ └── light-wood.png
│ └── xml
│ │ ├── common
│ │ ├── basic_scene.xml
│ │ ├── camera.xml
│ │ ├── gripper_assembly_chain.xml
│ │ ├── gripper_chain.xml
│ │ ├── gripper_indicator_chain.xml
│ │ ├── gripper_pick_chain.xml
│ │ ├── gripper_pick_indicator_chain.xml
│ │ ├── gripper_pick_pos_act.xml
│ │ ├── gripper_pick_target_chain.xml
│ │ ├── gripper_pos_act.xml
│ │ ├── gripper_target_chain.xml
│ │ ├── materials.xml
│ │ ├── pusher.xml
│ │ ├── pusher_gripper.xml
│ │ ├── sawyer.xml
│ │ ├── sawyer_assembly.xml
│ │ ├── sawyer_assembly_chain.xml
│ │ ├── sawyer_chain.xml
│ │ ├── sawyer_dependencies.xml
│ │ ├── sawyer_indicator_chain.xml
│ │ ├── sawyer_joint_pos_act.xml
│ │ ├── sawyer_no_gripper.xml
│ │ ├── sawyer_no_gripper_chain.xml
│ │ ├── sawyer_no_gripper_indicator_chain.xml
│ │ ├── sawyer_no_gripper_target_chain.xml
│ │ ├── sawyer_pick.xml
│ │ ├── sawyer_pick_chain.xml
│ │ ├── sawyer_pick_indicator_chain.xml
│ │ ├── sawyer_pick_target_chain.xml
│ │ ├── sawyer_target_chain.xml
│ │ ├── skybox.xml
│ │ ├── target.xml
│ │ └── visual.xml
│ │ ├── pusher_obstacle.xml
│ │ ├── sawyer.xml
│ │ ├── sawyer_assembly.xml
│ │ ├── sawyer_assembly_obstacle.xml
│ │ ├── sawyer_lift.xml
│ │ ├── sawyer_lift_obstacle.xml
│ │ ├── sawyer_push.xml
│ │ └── sawyer_push_obstacle.xml
├── base.py
├── inverse_kinematics.py
├── pusher
│ ├── __init__.py
│ └── pusher_obstacle.py
└── sawyer
│ ├── __init__.py
│ ├── sawyer.py
│ ├── sawyer_assembly.py
│ ├── sawyer_assembly_obstacle.py
│ ├── sawyer_lift.py
│ ├── sawyer_lift_obstacle.py
│ ├── sawyer_push.py
│ └── sawyer_push_obstacle.py
├── motion_planners
├── 3rd_party
│ └── include
│ │ └── cxxopts.hpp
├── KinematicPlanner.cpp
├── __init__.py
├── include
│ ├── KinematicPlanner.h
│ ├── c_planner.pxd.bak
│ ├── compound_state_projector.h
│ ├── mujoco_ompl_interface.h
│ ├── mujoco_wrapper.h
│ └── plan.pxd.bak
├── planner.cpp
├── planner.pyx
├── sampling_based_planner.py
├── setup.py
├── setup_macos.py
└── src
│ ├── compound_state_projector.cpp
│ ├── mujoco_ompl_interface.cpp
│ └── mujoco_wrapper.cpp
├── requirements.txt
├── rl
├── __init__.py
├── base_agent.py
├── dataset.py
├── main.py
├── mopa_rollouts.py
├── planner_agent.py
├── policies
│ ├── __init__.py
│ ├── actor_critic.py
│ ├── distributions.py
│ ├── mlp_actor_critic.py
│ └── utils.py
├── rollouts.py
├── sac_agent.py
├── td3_agent.py
└── trainer.py
├── scripts
├── 2d
│ ├── baseline.sh
│ ├── baseline_ik.sh
│ ├── baseline_lg.sh
│ ├── mopa.sh
│ ├── mopa_discrete.sh
│ └── mopa_ik.sh
├── 3d
│ ├── assembly
│ │ ├── baseline.sh
│ │ ├── baseline_ik.sh
│ │ ├── baseline_lg.sh
│ │ ├── mopa.sh
│ │ ├── mopa_discrete.sh
│ │ └── mopa_ik.sh
│ ├── lift
│ │ ├── baseline.sh
│ │ ├── baseline_ik.sh
│ │ ├── baseline_lg.sh
│ │ ├── mopa.sh
│ │ ├── mopa_discrete.sh
│ │ └── mopa_ik.sh
│ └── push
│ │ ├── baseline.sh
│ │ ├── baseline_ik.sh
│ │ ├── baseline_lg.sh
│ │ ├── mopa.sh
│ │ ├── mopa_discrete.sh
│ │ └── mopa_ik.sh
└── misc
│ ├── evaluate_safety.sh
│ └── installEigen.sh
└── util
├── __init__.py
├── contact_info.py
├── env.py
├── gym.py
├── info.py
├── logger.py
├── misc.py
├── mpi.py
├── pytorch.py
├── sawyer_env.py
└── transform_utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Mac OS
2 | .DS_Store
3 | *~
4 | .python-version
5 |
6 | # Log
7 | log
8 | logs
9 |
10 | # Vim
11 | .*.s[a-w][a-z]
12 | Session.vim
13 |
14 | # Data
15 | *.csv
16 | *.ini
17 | *.npy
18 | *.zip
19 | *screenlog*
20 |
21 | # Mujoco
22 | MUJOCO_LOG.TXT
23 |
24 | # Python
25 | ## Byte-compiled / optimized / DLL files
26 | __pycache__/
27 | *.py[cod]
28 | *$py.class
29 |
30 | ## C extensions
31 | #*.so
32 |
33 | ## Distribution / packaging
34 | .Python
35 | build/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 |
40 | ## Jupyter Notebook
41 | .ipynb_checkpoints
42 | **/*.ipynb
43 |
44 | ## virtualenv
45 | .venv
46 | venv/
47 |
48 | ## Rope project settings
49 | .ropeproject
50 |
51 | ## Wandb
52 | wandb/
53 |
54 | ## PyCharm
55 | .idea/
56 |
57 | ## VSCode
58 | .vscode/
59 |
60 | ## flake8
61 | .flake8
62 | z.vid
63 | z.log.global
64 | test_logs
65 | tmp
66 |
67 | ## Eigen
68 | eigen-3.3.7/
69 |
70 | motion_planners/*.so
71 | ./result
72 | ./plot
73 | # env/assets/xml/sawyer*
74 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Cognitive Learning for Vision and Robotics (CLVR) lab @ USC
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/config/motion_planner.py:
--------------------------------------------------------------------------------
1 | from util import str2bool
2 |
3 |
4 | def add_arguments(parser):
5 | """
6 | Adds a list of arguments to argparser for the reacher environment.
7 | """
8 | # reacher
9 | parser.add_argument(
10 | "--planner_type",
11 | type=str,
12 | default="rrt_connect",
13 | choices=[
14 | "rrt",
15 | "rrt_connect",
16 | ],
17 | help="planner type",
18 | )
19 | parser.add_argument(
20 | "--simple_planner_type",
21 | type=str,
22 | default="rrt_connect",
23 | choices=[
24 | "rrt",
25 | "rrt_connect",
26 | ],
27 | help="planner type for simple planner",
28 | )
29 | parser.add_argument(
30 | "--planner_objective",
31 | type=str,
32 | default="path_length",
33 | choices=[
34 | "maximize_min_clearance",
35 | "path_length",
36 | "state_cost_integral",
37 | "constraint",
38 | ],
39 | help="planner objective function",
40 | )
41 | parser.add_argument(
42 | "--threshold",
43 | type=float,
44 | default=0.0,
45 | help="threshold for optimization objective",
46 | )
47 | parser.add_argument(
48 | "--is_simplified",
49 | type=str2bool,
50 | default=False,
51 | help="enable simplification of planned trajectory",
52 | )
53 | parser.add_argument(
54 | "--simplified_duration",
55 | type=float,
56 | default=0.01,
57 | help="duration of simplification of planned trajectory",
58 | )
59 | parser.add_argument(
60 | "--simple_planner_simplified",
61 | type=str2bool,
62 | default=False,
63 | help="enable simplification of planned trajectory for simple planner",
64 | )
65 | parser.add_argument(
66 | "--simple_planner_simplified_duration",
67 | type=float,
68 | default=0.01,
69 | help="duration of simplification of planned trajectory for simple planner",
70 | )
71 |
72 |
73 | def get_default_config():
74 | """
75 | Gets default configurations for the reacher environment.
76 | """
77 | import argparse
78 |
79 | parser = argparse.ArgumentParser("Default Configuration for Motion Planner")
80 | add_argument(parser)
81 |
82 | config = parser.parse_args([])
83 | return config
84 |
--------------------------------------------------------------------------------
/config/pusher.py:
--------------------------------------------------------------------------------
1 | from util import str2bool
2 |
3 |
4 | def add_arguments(parser):
5 | """
6 | Adds a list of arguments to argparser for the pusher environment.
7 | """
8 | # pusher
9 | parser.add_argument(
10 | "--reward_type",
11 | type=str,
12 | default="dense",
13 | choices=["sparse"],
14 | help="reward type",
15 | )
16 | parser.add_argument(
17 | "--distance_threshold",
18 | type=float,
19 | default=0.05,
20 | help="distance threshold for termination",
21 | )
22 | parser.add_argument(
23 | "--max_episode_steps",
24 | type=int,
25 | default=150,
26 | help="maximum timesteps in an episode",
27 | )
28 |
29 | # observations
30 | parser.add_argument(
31 | "--screen_width", type=int, default=500, help="width of camera image"
32 | )
33 | parser.add_argument(
34 | "--screen_height", type=int, default=500, help="height of camera image"
35 | )
36 | parser.add_argument(
37 | "--frame_skip", type=int, default=1, help="Numer of skip frames"
38 | )
39 | parser.add_argument(
40 | "--kp", type=float, default=150.0, help="p term for a PID controller"
41 | )
42 | parser.add_argument(
43 | "--kd", type=float, default=20.0, help="d term for a PID controller"
44 | )
45 | parser.add_argument(
46 | "--ki", type=float, default=0.1, help="i term for a PID controller"
47 | )
48 | parser.add_argument(
49 | "--frame_dt", type=float, default=1.0, help="delta t between each frame"
50 | )
51 | parser.add_argument(
52 | "--ctrl_reward_coef", type=float, default=0, help="control reward coefficient"
53 | )
54 | parser.add_argument(
55 | "--success_reward", type=float, default=150.0, help="completion reward"
56 | )
57 | parser.add_argument(
58 | "--camera_name", type=str, default="cam0", help="camera name in an environment"
59 | )
60 | parser.add_argument(
61 | "--range", type=float, default=0.2, help="range of motion planner"
62 | )
63 | parser.add_argument(
64 | "--simple_planner_range",
65 | type=float,
66 | default=0.1,
67 | help="range of simple motion planner",
68 | )
69 | parser.add_argument(
70 | "--timelimit", type=float, default=1.0, help="timelimit for planning"
71 | )
72 | parser.add_argument(
73 | "--simple_planner_timelimit",
74 | type=float,
75 | default=0.02,
76 | help="timelimit for planning by simple motion planner",
77 | )
78 | parser.add_argument(
79 | "--contact_threshold",
80 | type=float,
81 | default=-0.0015,
82 | help="depth threshold for contact",
83 | )
84 | parser.add_argument(
85 | "--joint_margin", type=float, default=0.0, help="margin of each joint"
86 | )
87 | parser.add_argument(
88 | "--step_size",
89 | type=float,
90 | default=0.04,
91 | help="step size for invalid target handling",
92 | )
93 |
94 |
95 | def get_default_config():
96 | """
97 | Gets default configurations for the pusher environment.
98 | """
99 | import argparse
100 |
101 | parser = argparse.ArgumentParser("Default Configuration for 2D Pusher Environment")
102 | add_argument(parser)
103 |
104 | parser.add_argument("--seed", type=int, default=1234, help="random seed")
105 | parser.add_argument("--debug", type=str2bool, default=False, help="enable debugging")
106 |
107 | config = parser.parse_args([])
108 | return config
109 |
--------------------------------------------------------------------------------
/config/sawyer.py:
--------------------------------------------------------------------------------
1 | from util import str2bool
2 |
3 |
4 | def add_arguments(parser):
5 | """
6 | Adds a list of arguments to argparser for the sawyer environment.
7 | """
8 | # sawyer
9 | parser.add_argument(
10 | "--reward_type",
11 | type=str,
12 | default="dense",
13 | choices=["dense", "sparse"],
14 | help="reward type",
15 | )
16 | parser.add_argument(
17 | "--distance_threshold",
18 | type=float,
19 | default=0.06,
20 | help="distance threshold for termination",
21 | )
22 | parser.add_argument(
23 | "--max_episode_steps",
24 | type=int,
25 | default=250,
26 | help="maximum timesteps in an episode",
27 | )
28 | parser.add_argument(
29 | "--screen_width", type=int, default=500, help="width of camera image"
30 | )
31 | parser.add_argument(
32 | "--screen_height", type=int, default=500, help="height of camera image"
33 | )
34 | parser.add_argument(
35 | "--camera_name",
36 | type=str,
37 | default="visview",
38 | help="camera name in an environment",
39 | )
40 |
41 | # observations
42 | parser.add_argument(
43 | "--frame_skip", type=int, default=1, help="Numer of skip frames"
44 | )
45 | parser.add_argument(
46 | "--action_repeat", type=int, default=5, help="number of action repeats"
47 | )
48 | parser.add_argument(
49 | "--ctrl_reward_coef", type=float, default=0, help="control reward coefficient"
50 | )
51 |
52 | parser.add_argument(
53 | "--kp", type=float, default=40.0, help="p term for a PID controller"
54 | ) # 150.)
55 | parser.add_argument(
56 | "--kd", type=float, default=8.0, help="d term for a PID controller"
57 | ) # 20.)
58 | parser.add_argument(
59 | "--ki", type=float, default=0.0, help="i term for a PID controller"
60 | )
61 | parser.add_argument(
62 | "--frame_dt", type=float, default=0.15, help="delta t between each frame"
63 | ) # 0.1)
64 | parser.add_argument(
65 | "--use_robot_indicator",
66 | type=str2bool,
67 | default=True,
68 | help="enable visualization of robot indicator for motion planner",
69 | )
70 | parser.add_argument(
71 | "--use_target_robot_indicator",
72 | type=str2bool,
73 | default=True,
74 | help="enable visualization of robot indicator for target position of motion planner",
75 | )
76 | parser.add_argument(
77 | "--success_reward", type=float, default=150.0, help="completion reward"
78 | )
79 | parser.add_argument(
80 | "--range", type=float, default=0.1, help="range of motion planner"
81 | )
82 | parser.add_argument(
83 | "--simple_planner_range",
84 | type=float,
85 | default=0.05,
86 | help="range of simple motion planner",
87 | )
88 | parser.add_argument(
89 | "--timelimit", type=float, default=1.0, help="timelimit for motion planner"
90 | )
91 | parser.add_argument(
92 | "--simple_planner_timelimit",
93 | type=float,
94 | default=0.05,
95 | help="timelimit for simple motion planner",
96 | )
97 | parser.add_argument(
98 | "--contact_threshold",
99 | type=float,
100 | default=-0.002,
101 | help="depth thredhold for contact",
102 | )
103 | parser.add_argument(
104 | "--joint_margin", type=float, default=0.001, help="marin of each joint"
105 | )
106 | parser.add_argument(
107 | "--step_size",
108 | type=float,
109 | default=0.02,
110 | help="step size for invalid target handling",
111 | )
112 |
113 |
114 | def get_default_config():
115 | """
116 | Gets default configurations for the Sawyer environment.
117 | """
118 | import argparse
119 |
120 | parser = argparse.ArgumentParser("Default Configuration for Sawyer Environment")
121 | add_argument(parser)
122 |
123 | parser.add_argument("--seed", type=int, default=1234, help="random seed")
124 | parser.add_argument("--debug", type=str2bool, default=False, help="enable debugging")
125 |
126 | config = parser.parse_args([])
127 | return config
128 |
--------------------------------------------------------------------------------
/docs/content.md:
--------------------------------------------------------------------------------
1 |
2 | ----
3 |
4 | ## Overview
5 |
6 | 
7 |
8 | To solve tasks in obstructed environments, we propose motion planner augmented reinforcement learning (MoPA-RL ). Our framework consists of an RL policy and a motion planner . The motion planner is integrated into the RL policy by enlarging the action space. If a sampled action from the RL policy is in the original action space, an agent directly executes the action to the environment, otherwise the motion planner computes a path to move the agent to faraway points. MoPA-RL has three benefits:
9 |
10 | Add motion planning capabilities to any RL agent with joint space control
11 | Allow an agent to freely switch between MP and direct action execution by controlling the scale of action
12 | Naturally learn trajectories that avoid collisions by leveraging motion planning
13 |
14 |
15 | ----
16 |
17 | ## Videos
18 |
19 | Sawyer Push
20 | Sawyer arm is required to find a path to reach an object inside of a box, and push it to a goal position.
21 |
22 |
23 |
24 |
25 |
26 |
SAC
27 |
28 |
29 |
30 |
31 |
32 |
SAC Large
33 |
34 |
35 |
36 |
37 |
38 |
MoPA-SAC (Ours)
39 |
40 |
41 | Sawyer Lift
42 | Sawyer arm needs to find a path to get inside a box, grasp a can and take it out from the box.
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
SAC
51 |
52 |
53 |
54 |
55 |
56 |
SAC Large
57 |
58 |
59 |
60 |
61 |
62 |
MoPA-SAC (Ours)
63 |
64 |
65 |
66 |
67 | Sawyer Assembly
68 | Sawyer arm with an attached table leg needs to avoid other legs to reach a hole of the table, and insert the pole to assemble the table.
69 |
70 |
71 |
72 |
73 |
74 |
SAC
75 |
76 |
77 |
78 |
79 |
80 |
SAC Large
81 |
82 |
83 |
84 |
85 |
86 |
MoPA-SAC (Ours)
87 |
88 |
89 |
90 | ----
91 |
92 | ## Quantitative results
93 |
94 |
95 |
96 |
97 |
98 |
99 |
Success rates of our MoPA-SAC (green) and several baselines averaged over 4 seeds. Our approach can leverage the motion planner to converge with fewer environment steps than thebaseline. Both SAC and ours are trained for the same number of environment steps.
100 |
101 |
102 |
103 |
104 |
105 | ----
106 |
107 | ## Citation
108 | ```
109 | @inproceedings{yamada2020mopa,
110 | title={Motion Planner Augmented Reinforcement Learning for Obstructed Environments},
111 | author={Jun Yamada and Youngwoon Lee and Gautam Salhotra and Karl Pertsch and Max Pflueger and Gaurav S. Sukhatme and Joseph J. Lim and Peter Englert},
112 | booktitle={Conference on Robot Learning},
113 | year={2020},
114 | }
115 | ```
116 |
--------------------------------------------------------------------------------
/docs/img/2D_push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/2D_push.png
--------------------------------------------------------------------------------
/docs/img/clvrbanner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/clvrbanner.png
--------------------------------------------------------------------------------
/docs/img/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/favicon-32x32.png
--------------------------------------------------------------------------------
/docs/img/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/method.png
--------------------------------------------------------------------------------
/docs/img/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/result.png
--------------------------------------------------------------------------------
/docs/img/sawyer_assembly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_assembly.png
--------------------------------------------------------------------------------
/docs/img/sawyer_lift.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_lift.png
--------------------------------------------------------------------------------
/docs/img/sawyer_push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/sawyer_push.png
--------------------------------------------------------------------------------
/docs/img/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/img/teaser.png
--------------------------------------------------------------------------------
/docs/js/figure-extension.js:
--------------------------------------------------------------------------------
1 | (function (extension) {
2 | if (typeof showdown !== 'undefined') {
3 | // global (browser or nodejs global)
4 | extension(showdown);
5 | } else if (typeof define === 'function' && define.amd) {
6 | // AMD
7 | define(['showdown'], extension);
8 | } else if (typeof exports === 'object') {
9 | // Node, CommonJS-like
10 | module.exports = extension(require('showdown'));
11 | } else {
12 | // showdown was not found so we throw
13 | throw Error('Could not find showdown library');
14 | }
15 | }(function (showdown) {
16 |
17 | var fig = '' + ' ' + '%3 ' + ' ';
18 | var imgRegex = /(?:)?(?:<\/p>)?/gi;
19 |
20 | // loading extension into shodown
21 | showdown.extension('figure', function () {
22 | return [
23 | {
24 | type: 'output',
25 | filter: function (text, converter, options) {
26 | var tag = fig;
27 |
28 | return text.replace(imgRegex, function (match, url, alt, rest) {
29 | return tag.replace('%1', url).replace('%2', alt).replace('%3', alt).replace('%4', alt);
30 | });
31 | }
32 | }
33 | ];
34 | });
35 | }));
--------------------------------------------------------------------------------
/docs/video/push_baseline.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/push_baseline.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_assembly_baseline.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_baseline.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_assembly_baseline_lg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_baseline_lg.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_assembly_mopa.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_assembly_mopa.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_lift_baseline.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_baseline.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_lift_baseline_lg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_baseline_lg.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_lift_mopa.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_mopa.gif
--------------------------------------------------------------------------------
/docs/video/sawyer_lift_mopa.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_lift_mopa.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_push_baseline.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_baseline.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_push_baseline_lg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_baseline_lg.mp4
--------------------------------------------------------------------------------
/docs/video/sawyer_push_mopa.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_mopa.gif
--------------------------------------------------------------------------------
/docs/video/sawyer_push_mopa.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/sawyer_push_mopa.mp4
--------------------------------------------------------------------------------
/docs/video/teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/docs/video/teaser.gif
--------------------------------------------------------------------------------
/env/__init__.py:
--------------------------------------------------------------------------------
1 | """ Define all environments. """
2 |
3 | from gym.envs.registration import register
4 |
5 |
6 | # register all environments to use
7 | register(
8 | id="PusherObstacle-v0",
9 | entry_point="env.pusher:PusherObstacleEnv",
10 | kwargs={},
11 | )
12 |
13 | register(id="SawyerAssembly-v0", entry_point="env.sawyer:SawyerAssemblyEnv", kwargs={})
14 | register(
15 | id="SawyerAssemblyObstacle-v0",
16 | entry_point="env.sawyer:SawyerAssemblyObstacleEnv",
17 | kwargs={},
18 | )
19 |
20 | register(id="SawyerLift-v0", entry_point="env.sawyer:SawyerLiftEnv", kwargs={})
21 | register(
22 | id="SawyerLiftObstacle-v0",
23 | entry_point="env.sawyer:SawyerLiftObstacleEnv",
24 | kwargs={},
25 | )
26 |
27 | register(id="SawyerPush-v0", entry_point="env.sawyer:SawyerPushEnv", kwargs={})
28 | register(
29 | id="SawyerPushObstacle-v0",
30 | entry_point="env.sawyer:SawyerPushObstacleEnv",
31 | kwargs={},
32 | )
33 |
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/base.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/head.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/head.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l0.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l1.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l2.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l3.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l4.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l5.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/l6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/l6.stl
--------------------------------------------------------------------------------
/env/assets/meshes/sawyer/pedestal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/sawyer/pedestal.stl
--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/0.stl
--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/1.stl
--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/2.stl
--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/3.stl
--------------------------------------------------------------------------------
/env/assets/meshes/toy_table/4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/toy_table/4.stl
--------------------------------------------------------------------------------
/env/assets/meshes/two_finger_gripper/electric_gripper_base.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/electric_gripper_base.STL
--------------------------------------------------------------------------------
/env/assets/meshes/two_finger_gripper/half_round_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/half_round_tip.STL
--------------------------------------------------------------------------------
/env/assets/meshes/two_finger_gripper/standard_narrow.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/meshes/two_finger_gripper/standard_narrow.STL
--------------------------------------------------------------------------------
/env/assets/objects/can-visual.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/env/assets/objects/can.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/env/assets/objects/meshes/can.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/objects/meshes/can.stl
--------------------------------------------------------------------------------
/env/assets/textures/can.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/can.png
--------------------------------------------------------------------------------
/env/assets/textures/dark-wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/dark-wood.png
--------------------------------------------------------------------------------
/env/assets/textures/grid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/grid.png
--------------------------------------------------------------------------------
/env/assets/textures/light-wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/env/assets/textures/light-wood.png
--------------------------------------------------------------------------------
/env/assets/xml/common/basic_scene.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
20 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/env/assets/xml/common/camera.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_assembly_chain.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_chain.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_indicator_chain.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pick_chain.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pick_indicator_chain.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pick_pos_act.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pick_target_chain.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_pos_act.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/env/assets/xml/common/gripper_target_chain.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/env/assets/xml/common/materials.xml:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_assembly.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_joint_pos_act.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_no_gripper.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/env/assets/xml/common/sawyer_pick.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/env/assets/xml/common/skybox.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
7 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/env/assets/xml/common/target.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/env/assets/xml/common/visual.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/env/assets/xml/pusher_obstacle.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/env/assets/xml/sawyer.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/env/assets/xml/sawyer_assembly.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/env/assets/xml/sawyer_assembly_obstacle.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/env/assets/xml/sawyer_lift.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/env/assets/xml/sawyer_lift_obstacle.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/env/assets/xml/sawyer_push.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/env/assets/xml/sawyer_push_obstacle.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/env/pusher/__init__.py:
--------------------------------------------------------------------------------
1 | from env.pusher.pusher_obstacle import PusherObstacleEnv
2 |
--------------------------------------------------------------------------------
/env/sawyer/__init__.py:
--------------------------------------------------------------------------------
1 | from env.sawyer.sawyer_push import SawyerPushEnv
2 | from env.sawyer.sawyer_push_obstacle import SawyerPushObstacleEnv
3 | from env.sawyer.sawyer_lift import SawyerLiftEnv
4 | from env.sawyer.sawyer_assembly import SawyerAssemblyEnv
5 | from env.sawyer.sawyer_assembly_obstacle import SawyerAssemblyObstacleEnv
6 | from env.sawyer.sawyer_assembly import SawyerAssemblyEnv
7 | from env.sawyer.sawyer_lift_obstacle import SawyerLiftObstacleEnv
8 |
--------------------------------------------------------------------------------
/env/sawyer/sawyer_assembly.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from env.sawyer.sawyer import SawyerEnv
4 |
5 |
6 | class SawyerAssemblyEnv(SawyerEnv):
7 | def __init__(self, **kwargs):
8 | kwargs["camera_name"] = "topview"
9 | super().__init__("sawyer_assembly.xml", **kwargs)
10 | self._get_reference()
11 |
12 | def _get_reference(self):
13 | super()._get_reference()
14 |
15 | @property
16 | def dof(self):
17 | return 7
18 |
19 | @property
20 | def init_qpos(self):
21 | return np.array([0.427, 0.13, 0.0557, 0.114, -0.0622, 0.0276, 0.00356])
22 |
23 | def _reset(self):
24 | init_qpos = (
25 | self.init_qpos + self.np_random.randn(self.init_qpos.shape[0]) * 0.02
26 | )
27 | self.sim.data.qpos[self.ref_joint_pos_indexes] = init_qpos
28 | self.sim.data.qvel[self.ref_joint_vel_indexes] = 0.0
29 | self.sim.forward()
30 |
31 | return self._get_obs()
32 |
33 | def compute_reward(self, action):
34 | info = {}
35 | reward = 0
36 | reward_type = self._kwargs["reward_type"]
37 | pegHeadPos = self.sim.data.get_site_xpos("pegHead")
38 | hole = self.sim.data.get_site_xpos("hole")
39 | dist = np.linalg.norm(pegHeadPos - hole)
40 | hole_bottom = self.sim.data.get_site_xpos("hole_bottom")
41 | dist_to_hole_bottom = np.linalg.norm(pegHeadPos - hole_bottom)
42 | if reward_type == "dense":
43 | reward_reach = np.tanh(-1.5 * dist)
44 | reward += reward_reach
45 | info = dict(reward_reach=reward_reach)
46 | else:
47 | reward_reach = 0
48 | if dist < 0.3:
49 | # reward_reach += 0.4 * (1-np.tanh(15*dist))
50 | reward_reach += 0.4 * (1 - np.tanh(15 * dist_to_hole_bottom))
51 | reward += reward_reach
52 | if dist_to_hole_bottom < 0.04:
53 | reward += self._kwargs["success_reward"]
54 | self._success = True
55 | self._terminal = True
56 |
57 | return reward, info
58 |
59 | def _get_obs(self):
60 | di = super()._get_obs()
61 | di["hole"] = self.sim.data.get_site_xpos("hole")
62 | di["pegHead"] = self.sim.data.get_site_xpos("pegHead")
63 | di["pegEnd"] = self.sim.data.get_site_xpos("pegEnd")
64 | di["peg_quat"] = self._get_quat("peg")
65 | return di
66 |
67 | @property
68 | def static_bodies(self):
69 | return ["table", "furniture", "0_part0", "1_part1", "4_part4", "2_part2"]
70 |
71 | @property
72 | def static_geoms(self):
73 | return []
74 |
75 | @property
76 | def static_geom_ids(self):
77 | body_ids = []
78 | for body_name in self.static_bodies:
79 | body_ids.append(self.sim.model.body_name2id(body_name))
80 |
81 | geom_ids = []
82 | for geom_id, body_id in enumerate(self.sim.model.geom_bodyid):
83 | if body_id in body_ids:
84 | geom_ids.append(geom_id)
85 | return geom_ids
86 |
87 | def _step(self, action, is_planner=False):
88 | """
89 | (Optional) does gripper visualization after actions.
90 | """
91 | assert len(action) == self.dof, "environment got invalid action dimension"
92 |
93 | if not is_planner or self._prev_state is None:
94 | self._prev_state = self.sim.data.qpos[self.ref_joint_pos_indexes].copy()
95 |
96 | if self._i_term is None:
97 | self._i_term = np.zeros_like(self.mujoco_robot.dof)
98 |
99 | if is_planner:
100 | rescaled_ac = np.clip(
101 | action[: self.robot_dof], -self._ac_scale, self._ac_scale
102 | )
103 | else:
104 | rescaled_ac = np.clip(
105 | action[: self.robot_dof] * self._ac_scale,
106 | -self._ac_scale,
107 | self._ac_scale,
108 | )
109 | desired_state = self._prev_state + rescaled_ac
110 |
111 | n_inner_loop = int(self._frame_dt / self.dt)
112 | for _ in range(n_inner_loop):
113 | self.sim.data.qfrc_applied[
114 | self.ref_joint_vel_indexes
115 | ] = self.sim.data.qfrc_bias[self.ref_joint_vel_indexes].copy()
116 |
117 | if self.use_robot_indicator:
118 | self.sim.data.qfrc_applied[
119 | self.ref_indicator_joint_pos_indexes
120 | ] = self.sim.data.qfrc_bias[self.ref_indicator_joint_pos_indexes].copy()
121 |
122 | if self.use_target_robot_indicator:
123 | self.sim.data.qfrc_applied[
124 | self.ref_target_indicator_joint_pos_indexes
125 | ] = self.sim.data.qfrc_bias[
126 | self.ref_target_indicator_joint_pos_indexes
127 | ].copy()
128 | self._do_simulation(desired_state)
129 |
130 | self._prev_state = np.copy(desired_state)
131 | reward, info = self.compute_reward(action)
132 |
133 | return self._get_obs(), reward, self._terminal, info
134 |
--------------------------------------------------------------------------------
/env/sawyer/sawyer_assembly_obstacle.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from env.sawyer.sawyer import SawyerEnv
4 |
5 |
6 | class SawyerAssemblyObstacleEnv(SawyerEnv):
7 | def __init__(self, **kwargs):
8 | super().__init__("sawyer_assembly_obstacle.xml", **kwargs)
9 | self._get_reference()
10 |
11 | def _get_reference(self):
12 | super()._get_reference()
13 |
14 | @property
15 | def dof(self):
16 | return 7
17 |
18 | @property
19 | def init_qpos(self):
20 | return np.array([0.427, 0.13, 0.0557, 0.114, -0.0622, 0.0276, 0.00356])
21 |
22 | def _reset(self):
23 | init_qpos = (
24 | self.init_qpos + self.np_random.randn(self.init_qpos.shape[0]) * 0.02
25 | )
26 | self.sim.data.qpos[self.ref_joint_pos_indexes] = init_qpos
27 | self.sim.data.qvel[self.ref_joint_vel_indexes] = 0.0
28 | self.sim.forward()
29 |
30 | return self._get_obs()
31 |
32 | def compute_reward(self, action):
33 | info = {}
34 | reward = 0
35 | reward_type = self._kwargs["reward_type"]
36 | pegHeadPos = self.sim.data.get_site_xpos("pegHead")
37 | hole = self.sim.data.get_site_xpos("hole")
38 | dist = np.linalg.norm(pegHeadPos - hole)
39 | hole_bottom = self.sim.data.get_site_xpos("hole_bottom")
40 | dist_to_hole_bottom = np.linalg.norm(pegHeadPos - hole_bottom)
41 | dist_to_hole = np.linalg.norm(pegHeadPos - hole)
42 | reward_reach = 0
43 | if dist < 0.3:
44 | reward_reach += 0.4 * (1 - np.tanh(15 * dist_to_hole))
45 | reward += reward_reach
46 | if dist_to_hole_bottom < 0.025:
47 | reward += self._kwargs["success_reward"]
48 | self._success = True
49 | self._terminal = True
50 |
51 | return reward, info
52 |
53 | def _get_obs(self):
54 | di = super()._get_obs()
55 | di["hole"] = self.sim.data.get_site_xpos("hole")
56 | di["pegHead"] = self.sim.data.get_site_xpos("pegHead")
57 | di["pegEnd"] = self.sim.data.get_site_xpos("pegEnd")
58 | di["peg_quat"] = self._get_quat("peg")
59 | return di
60 |
61 | @property
62 | def static_bodies(self):
63 | return ["table"]
64 |
65 | @property
66 | def manipulation_bodies(self):
67 | return ["furniture", "0_part0", "1_part1", "4_part4", "2_part2"]
68 |
69 | @property
70 | def manipulation_geom_ids(self):
71 | body_ids = []
72 | for body_name in self.manipulation_bodies:
73 | body_ids.append(self.sim.model.body_name2id(body_name))
74 |
75 | geom_ids = []
76 | for geom_id, body_id in enumerate(self.sim.model.geom_bodyid):
77 | if body_id in body_ids:
78 | geom_ids.append(geom_id)
79 | return geom_ids
80 |
81 | @property
82 | def static_geoms(self):
83 | return []
84 |
85 | @property
86 | def static_geom_ids(self):
87 | body_ids = []
88 | for body_name in self.static_bodies:
89 | body_ids.append(self.sim.model.body_name2id(body_name))
90 |
91 | geom_ids = []
92 | for geom_id, body_id in enumerate(self.sim.model.geom_bodyid):
93 | if body_id in body_ids:
94 | geom_ids.append(geom_id)
95 | return geom_ids
96 |
97 | def _step(self, action, is_planner=False):
98 | """
99 | (Optional) does gripper visualization after actions.
100 | """
101 | assert len(action) == self.dof, "environment got invalid action dimension"
102 |
103 | if not is_planner or self._prev_state is None:
104 | self._prev_state = self.sim.data.qpos[self.ref_joint_pos_indexes].copy()
105 |
106 | if self._i_term is None:
107 | self._i_term = np.zeros_like(self.mujoco_robot.dof)
108 |
109 | if is_planner:
110 | rescaled_ac = np.clip(
111 | action[: self.robot_dof], -self._ac_scale, self._ac_scale
112 | )
113 | else:
114 | rescaled_ac = np.clip(
115 | action[: self.robot_dof] * self._ac_scale,
116 | -self._ac_scale,
117 | self._ac_scale,
118 | )
119 | desired_state = self._prev_state + rescaled_ac
120 |
121 | n_inner_loop = int(self._frame_dt / self.dt)
122 | for _ in range(n_inner_loop):
123 | self.sim.data.qfrc_applied[
124 | self.ref_joint_vel_indexes
125 | ] = self.sim.data.qfrc_bias[self.ref_joint_vel_indexes].copy()
126 |
127 | if self.use_robot_indicator:
128 | self.sim.data.qfrc_applied[
129 | self.ref_indicator_joint_pos_indexes
130 | ] = self.sim.data.qfrc_bias[self.ref_indicator_joint_pos_indexes].copy()
131 |
132 | if self.use_target_robot_indicator:
133 | self.sim.data.qfrc_applied[
134 | self.ref_target_indicator_joint_pos_indexes
135 | ] = self.sim.data.qfrc_bias[
136 | self.ref_target_indicator_joint_pos_indexes
137 | ].copy()
138 | self._do_simulation(desired_state)
139 |
140 | self._prev_state = np.copy(desired_state)
141 | reward, info = self.compute_reward(action)
142 |
143 | return self._get_obs(), reward, self._terminal, info
144 |
--------------------------------------------------------------------------------
/motion_planners/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/motion_planners/__init__.py
--------------------------------------------------------------------------------
/motion_planners/include/KinematicPlanner.h:
--------------------------------------------------------------------------------
1 | #ifndef _KinematicPlanner_h
2 | #define _KinematicPlanner_h
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 |
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 |
21 | #include
22 |
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 |
30 | #include
31 | #include "mujoco_wrapper.h"
32 | #include "mujoco_ompl_interface.h"
33 |
34 | namespace ob = ompl::base;
35 | namespace oc = ompl::control;
36 | namespace og = ompl::geometric;
37 |
38 | namespace MotionPlanner
39 | {
40 | class KinematicPlanner
41 | {
42 | public:
43 | std::string xml_filename;
44 | std::string algo;
45 | int num_actions;
46 | std::string opt;
47 | double threshold;
48 | double _range;
49 | std::string mjkey_filename;
50 | std::shared_ptr mj;
51 | std::shared_ptr si;
52 | std::shared_ptr mj_state_prop;
53 | std::shared_ptr rrt_planner;
54 | std::shared_ptr rrt_connect_planner;
55 | std::shared_ptr ss;
56 | std::shared_ptr msvc;
57 | std::shared_ptr psimp_;
58 | std::vector passive_joint_idx;
59 | std::vector glue_bodies;
60 | std::vector> ignored_contacts;
61 | std::string planner_status;
62 | bool isSimplified;
63 | double simplifiedDuration;
64 | int seed;
65 |
66 | KinematicPlanner(std::string xml_filename, std::string algo, int num_actions, std::string opt, double threshold, double _range, std::vector passive_joint_idx, std::vector Glue_bodies, std::vector> ignored_contacts, double contact_threshold, double goal_bias, bool is_simplified, double simplified_duration, int seed);
67 | ~KinematicPlanner();
68 | std::vector > plan(std::vector start_vec, std::vector goal_vec, double timelimit);
69 | bool isValidState(std::vector state_vec);
70 | std::string getPlannerStatus();
71 | };
72 | }
73 |
74 | #endif
75 |
--------------------------------------------------------------------------------
/motion_planners/include/c_planner.pxd.bak:
--------------------------------------------------------------------------------
1 | from libcpp.string cimport string
2 | from libcpp.vector cimport vector
3 |
4 | cdef extern from "Planner.h" namespace "MotionPlanner":
5 | cdef cppclass Planner:
6 | Planner(string)
7 | string xml_filename
8 | int planning(vector[double], vector[double], double)
9 |
--------------------------------------------------------------------------------
/motion_planners/include/compound_state_projector.h:
--------------------------------------------------------------------------------
1 | /// Provide a state projection for compound states.
2 | /// This only uses the elements of the state that are expressed as a vector
3 | /// of doubles.
4 |
5 | #pragma once
6 |
7 | #include
8 | #include
9 |
10 | #include
11 | #include
12 | #include
13 |
14 | class CompoundStateProjector
15 | : public ompl::base::RealVectorRandomLinearProjectionEvaluator {
16 | public:
17 | CompoundStateProjector(const ompl::base::CompoundStateSpace* space,
18 | std::shared_ptr real_space_,
19 | int dim_)
20 | : RealVectorRandomLinearProjectionEvaluator(real_space_, dim_),
21 | real_space(real_space_),
22 | dim(dim_) {
23 | }
24 |
25 | CompoundStateProjector(const ompl::base::CompoundStateSpace* space,
26 | std::shared_ptr real_space_,
27 | std::vector cell_sizes)
28 | : RealVectorRandomLinearProjectionEvaluator(real_space_, cell_sizes),
29 | real_space(real_space_),
30 | dim(cell_sizes.size()) {}
31 |
32 | static std::shared_ptr makeCompoundStateProjector(
33 | const ompl::base::CompoundStateSpace* space)
34 | {
35 | auto real_space = std::make_shared(
36 | space->getDimension());
37 | // real_space->setBounds(-3.14, 3.14);
38 | // real_space->setBounds(-1, 1);
39 | int dim = 3;
40 | std::vector cell_sizes(dim, 0.1);
41 | auto csp(std::make_shared(
42 | space, real_space, cell_sizes));
43 | return csp;
44 | }
45 |
46 | unsigned int getDimension() const override {
47 | return dim;
48 | }
49 |
50 | void project(
51 | const ompl::base::State* state,
52 | Eigen::Ref< Eigen::VectorXd > projection) const override
53 | {
54 | auto rv_state = getRealVectorState(state);
55 |
56 | // Use a real space projection
57 | ompl::base::RealVectorRandomLinearProjectionEvaluator::project(
58 | rv_state, projection);
59 |
60 | // Cleanup
61 | real_space->freeState(rv_state);
62 | }
63 |
64 |
65 | private:
66 | ompl::base::State* getRealVectorState(const ompl::base::State* state) const {
67 | // Create a real vector state
68 | std::vector reals;
69 | real_space->copyToReals(reals, state);
70 | auto rv_state = real_space->allocState();
71 | for(size_t i=0; i < reals.size(); i++) {
72 | rv_state->as()
73 | ->values[i] = reals[i];
74 | }
75 |
76 | return rv_state;
77 | }
78 |
79 | std::shared_ptr real_space;
80 | const int dim;
81 | };
82 |
--------------------------------------------------------------------------------
/motion_planners/include/mujoco_wrapper.h:
--------------------------------------------------------------------------------
1 | // Wrap MuJoCo functionality to make it available to OMPL API
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include "mujoco.h"
11 |
12 | struct JointInfo {
13 | std::string name;
14 | int type;
15 | bool limited;
16 | mjtNum range[2];
17 | int qposadr;
18 | int dofadr;
19 | };
20 |
21 | std::ostream& operator<<(std::ostream& os, const JointInfo& ji);
22 |
23 | std::vector getJointInfo(const mjModel* m);
24 |
25 | struct StateRange {
26 | bool limited;
27 | mjtNum range[2];
28 | };
29 |
30 | StateRange getCtrlRange(const mjModel* m, size_t i);
31 |
32 | struct MuJoCoState {
33 | mjtNum time;
34 | std::vector qpos;
35 | std::vector qvel;
36 | std::vector act;
37 | std::vector ctrl;
38 | };
39 |
40 | std::ostream& operator<<(std::ostream& os, const MuJoCoState& s);
41 |
42 |
43 | // Put some sanity on the MuJoCo API
44 | class MuJoCo {
45 | public:
46 | MuJoCo(std::string mjkey_filename):
47 | m(0), d(0) {
48 | // mj_activate and mj_deactivate should only be called
49 | // once per program
50 | // mj_instance_count_lock.lock();
51 | //if (mj_instance_count == 0) {
52 | mj_activate(mjkey_filename.c_str());
53 | //}
54 | //mj_instance_count += 1;
55 | //mj_instance_count_lock.unlock();
56 | }
57 |
58 | ~MuJoCo() {
59 | if (d) mj_deleteData(d);
60 | if (m) mj_deleteModel(m);
61 | mj_instance_count_lock.lock();
62 | mj_instance_count -= 1;
63 | if (mj_instance_count == 0) {
64 | mj_deactivate();
65 | }
66 | mj_instance_count_lock.unlock();
67 | }
68 |
69 | // TODO: copy constructor
70 | // TODO: assignment operator
71 |
72 | bool loadXML(std::string filename) {
73 | if (m) mj_deleteModel(m);
74 | char error[1000];
75 | m = mj_loadXML(filename.c_str(), 0, error, 1000);
76 | if (!m) {
77 | std::cerr << error << std::endl;
78 | }
79 | max_timestep = m->opt.timestep;
80 | return m;
81 | }
82 |
83 | bool makeData() {
84 | if (!m) {
85 | std::cerr << "Cannot makeData without a model!" << std::endl;
86 | return false;
87 | }
88 | if (d) mj_deleteData(d);
89 | d = mj_makeData(m);
90 | return d;
91 | }
92 |
93 |
94 |
95 | std::string getJointName(int i) const {
96 | // Avert your eyes of this horror
97 | return std::string(m->names + m->name_jntadr[i]);
98 | }
99 |
100 | std::string getBodyName(int i) const {
101 | return std::string(m->names + m->name_bodyadr[i]);
102 | }
103 |
104 | std::string getActName(int i) const {
105 | return std::string(m->names + m->name_actuatoradr[i]);
106 | }
107 |
108 | /// Set the world to random state within specified limits
109 | /// modifies d->qpos and d->qvel
110 | void setRandomState() {
111 | mj_resetData(m, d);
112 | // Set default states
113 | for (size_t i=0; i < m->nq; i++) {
114 | d->qpos[i] = m->qpos0[i];
115 | }
116 |
117 | // Set random states within joint limit for DoFs
118 | auto joints = getJointInfo(m);
119 | for (size_t i=0; i < m->nv; i++) {
120 | int joint_id = m->dof_jntid[i];
121 | int qposadr = m->jnt_qposadr[ joint_id ];
122 |
123 | mjtNum r = ((mjtNum) rand()) / ((mjtNum) RAND_MAX);
124 | auto lower = joints[joint_id].range[0];
125 | auto upper = joints[joint_id].range[1];
126 | if (!joints[joint_id].limited) {
127 | // set range to -pi to pi
128 | lower = -3.1416;
129 | upper = 3.1416;
130 | }
131 | d->qpos[qposadr] = (r * (upper - lower)) + lower;
132 |
133 | // velocity = 0 seem reasonable
134 | d->qvel[i] = 0;
135 | }
136 | }
137 |
138 | void setState(MuJoCoState s) {
139 | d->time = s.time;
140 | for(size_t i=0; i < m->nq; i++) {
141 | if (i >= s.qpos.size()) break;
142 | d->qpos[i] = s.qpos[i];
143 | }
144 | for(size_t i=0; i < m->nv; i++) {
145 | if (i >= s.qvel.size()) break;
146 | d->qvel[i] = s.qvel[i];
147 | }
148 | for(size_t i=0; i < m->na; i++) {
149 | if (i >= s.act.size()) break;
150 | d->act[i] = s.act[i];
151 | }
152 | for(size_t i=0; i < m->nu; i++) {
153 | if (i >= s.ctrl.size()) break;
154 | d->ctrl[i] = s.ctrl[i];
155 | }
156 | }
157 |
158 | MuJoCoState getState() const {
159 | MuJoCoState s;
160 | s.time = d->time;
161 | for(size_t i=0; i < m->nq; i++) {
162 | s.qpos.push_back(d->qpos[i]);
163 | }
164 | for(size_t i=0; i < m->nv; i++) {
165 | s.qvel.push_back(d->qvel[i]);
166 | }
167 | for(size_t i=0; i < m->na; i++) {
168 | s.act.push_back(d->act[i]);
169 | }
170 | for(size_t i=0; i < m->nu; i++) {
171 | s.ctrl.push_back(d->ctrl[i]);
172 | }
173 | return s;
174 | }
175 |
176 | void step() {
177 | mj_step(m, d);
178 | }
179 |
180 | void sim_duration(double duration) {
181 | int steps = ceil(duration / max_timestep);
182 | m->opt.timestep = duration / steps;
183 | for(int i=0; i < steps; i++) {
184 | mj_step(m, d);
185 | }
186 | }
187 |
188 | double getMaxTimestep() const {
189 | return max_timestep;
190 | }
191 |
192 | mjModel* m;
193 | mjData* d;
194 |
195 | private:
196 | double max_timestep;
197 | static int mj_instance_count;
198 | static std::mutex mj_instance_count_lock;
199 | };
200 |
201 |
--------------------------------------------------------------------------------
/motion_planners/include/plan.pxd.bak:
--------------------------------------------------------------------------------
1 | cimport c_planner
2 | from c_planner cimport Planner as CPlanner
3 |
4 | cdef class Planner:
5 | cdef c_planner.Planner *thisptr
6 | cpdef planning(self, start_vec, goal_vec, timelimit)
7 |
--------------------------------------------------------------------------------
/motion_planners/planner.pyx:
--------------------------------------------------------------------------------
1 | # distutils: language = c++
2 | # distutils: sources = KinematicPlanner.cpp
3 |
4 | from libcpp.string cimport string
5 | from libcpp cimport bool as bool_
6 | from libcpp.vector cimport vector
7 | from libcpp.pair cimport pair
8 |
9 | cdef extern from "KinematicPlanner.h" namespace "MotionPlanner":
10 | cdef cppclass KinematicPlanner:
11 | KinematicPlanner(string, string, int, string, double, double, vector[int], vector[string],
12 | vector[pair[int, int]], double, double, bool_, double, int) except +
13 | string xml_filename
14 | string opt
15 | int num_actions
16 | string algo
17 | double _range
18 | double threshold
19 | vector[int] passive_joint_idx
20 | vector[string] glue_bodies
21 | vector[pair[int, int]] ignored_contacts
22 | double contact_threshold
23 | string planner_status
24 | bool_ isSimplified
25 | double simplifiedDuration
26 | vector[vector[double]] plan(vector[double], vector[double], double)
27 | bool_ isValidState(vector[double])
28 | string getPlannerStatus()
29 | int seed
30 |
31 | cdef class PyKinematicPlanner:
32 | cdef KinematicPlanner *thisptr
33 | def __cinit__(self, string xml_filename, string algo, int num_actions, string opt, double threshold,
34 | double _range, vector[int] passive_joint_idx, vector[string] glue_bodies,
35 | vector[pair[int, int]] ignored_contacts, double contact_threshold, double goal_bias,
36 | bool_ is_simplified, double simplified_duration, int seed):
37 |
38 | self.thisptr = new KinematicPlanner(xml_filename, algo, num_actions, opt, threshold, _range,
39 | passive_joint_idx, glue_bodies, ignored_contacts, contact_threshold,
40 | goal_bias, is_simplified, simplified_duration, seed)
41 |
42 | def __dealloc__(self):
43 | del self.thisptr
44 |
45 | cpdef plan(self, start_vec, goal_vec, timelimit):
46 | return self.thisptr.plan(start_vec, goal_vec, timelimit)
47 |
48 | cpdef getPlannerStatus(self):
49 | return self.thisptr.getPlannerStatus()
50 |
51 | cpdef isValidState(self, state_vec):
52 | return self.thisptr.isValidState(state_vec)
53 |
54 |
--------------------------------------------------------------------------------
/motion_planners/sampling_based_planner.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 |
3 | import numpy as np
4 | import subprocess
5 | from threading import Lock, Thread
6 | import yaml
7 | from motion_planners.planner import PyKinematicPlanner
8 | from util.env import joint_convert
9 |
10 |
11 | class SamplingBasedPlanner:
12 | def __init__(
13 | self,
14 | config,
15 | xml_path,
16 | num_actions,
17 | non_limited_idx,
18 | planner_type=None,
19 | passive_joint_idx=[],
20 | glue_bodies=[],
21 | ignored_contacts=[],
22 | contact_threshold=0.0,
23 | goal_bias=0.05,
24 | is_simplified=False,
25 | simplified_duration=0.1,
26 | range_=None,
27 | ):
28 | self.config = config
29 | if planner_type is None:
30 | planner_type = config.planner_type
31 | if range_ is None:
32 | range_ = config.range
33 | self.planner = PyKinematicPlanner(
34 | xml_path.encode("utf-8"),
35 | planner_type.encode("utf-8"),
36 | num_actions,
37 | config.planner_objective.encode("utf-8"),
38 | config.threshold,
39 | range_,
40 | passive_joint_idx,
41 | glue_bodies,
42 | ignored_contacts,
43 | contact_threshold,
44 | goal_bias,
45 | is_simplified,
46 | simplified_duration,
47 | config.seed,
48 | )
49 | self.non_limited_idx = non_limited_idx
50 |
51 | def convert_nonlimited(self, state):
52 | if self.non_limited_idx is not None:
53 | for idx in self.non_limited_idx:
54 | state[idx] = joint_convert(state[idx])
55 | return state
56 |
57 | def isValidState(self, state):
58 | return self.planner.isValidState(state)
59 |
60 | def plan(self, start, goal, timelimit=1.0):
61 | valid_state = True
62 | exact = True
63 | converted_start = self.convert_nonlimited(start.copy())
64 | converted_goal = self.convert_nonlimited(goal.copy())
65 | states = np.array(self.planner.plan(converted_start, converted_goal, timelimit))
66 |
67 | if np.unique(states).size == 1:
68 | if states[0][0] == -5:
69 | valid_state = False
70 | if states[0][0] == -4:
71 | exact = False
72 | return states, states, valid_state, exact
73 |
74 | traj = [start]
75 | pre_state = states[0]
76 | for _, state in enumerate(states[1:]):
77 | # converted_pre_state = self.convert_nonlimited(pre_state.copy())
78 | tmp_state = traj[-1] + (state - pre_state)
79 | if self.non_limited_idx is not None:
80 | for idx in self.non_limited_idx:
81 | if abs(state[idx] - pre_state[idx]) > 3.14:
82 | if pre_state[idx] > 0 and state[idx] <= 0:
83 | # if traj[-1][idx] < 0:
84 | tmp_state[idx] = traj[-1][idx] + (
85 | 3.14 - pre_state[idx] + state[idx] + 3.14
86 | )
87 | # else:
88 | # tmp_state[idx] = traj[-1][idx] - (3.14-pre_state[idx] + state[idx] + 3.14)
89 | # tmp_state[idx] = traj[-1][idx] + 3.14 + state[idx]
90 | elif pre_state[idx] < 0 and state[idx] > 0:
91 | # if traj[-1][idx] < 0:
92 | tmp_state[idx] = traj[-1][idx] - (
93 | 3.14 - state[idx] + pre_state[idx] + 3.14
94 | )
95 | # else:
96 | # tmp_state[idx] = traj[-1][idx] + (3.14-state[idx] + pre_state[idx] + 3.14)
97 |
98 | # tmp_state[idx] = traj[-1][idx] - 3.14 + state[idx]
99 | pre_state = state
100 | traj.append(tmp_state)
101 | return np.array(traj), states, valid_state, exact
102 |
103 | def remove_collision(self, geom_id, contype, conaffinity):
104 | self.planner.removeCollision(geom_id, contype, conaffinity)
105 |
106 | def get_planner_status(self):
107 | return self.planner.getPlannerStatus().decode("utf-8")
108 |
--------------------------------------------------------------------------------
/motion_planners/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages, Extension
2 | from Cython.Build import cythonize
3 | from distutils.sysconfig import get_python_lib
4 | import glob
5 | import os
6 | import sys
7 | import platform
8 |
9 | prefix_path = os.environ['HOME']
10 | extensions = [
11 | Extension('planner', ['planner.pyx', 'KinematicPlanner.cpp', './src/mujoco_ompl_interface.cpp', './src/mujoco_wrapper.cpp',
12 | ],
13 | include_dirs=["./include/", '/usr/local/include/eigen3', './3rd_party/include/',
14 | os.path.join(prefix_path, '.mujoco/mujoco200/include/'), '/usr/local/include/ompl'],
15 | extra_objects=['/usr/local/lib/libompl.so', os.path.join(prefix_path, '.mujoco/mujoco200/bin/libmujoco200.so')],
16 | extra_compile_args=['-std=c++11'],
17 | language="c++"),
18 | ]
19 | setup(
20 | name='mujoco-ompl',
21 | ext_modules=cythonize(extensions),
22 | )
23 |
--------------------------------------------------------------------------------
/motion_planners/setup_macos.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages, Extension
2 | from Cython.Build import cythonize
3 | from distutils.sysconfig import get_python_lib
4 | import glob
5 | import os
6 | import sys
7 | import platform
8 |
9 | # export MACOSX_DEPLOYMENT_TARGET=10.13
10 |
11 | prefix_path = os.environ['HOME']
12 | ompl_lib_path = os.path.join(prefix_path, '/usr/local/lib/libompl.dylib')
13 | eigen_include_path = '/usr/local/include/eigen3'
14 |
15 | extensions = [
16 | Extension('planner', ['planner.pyx', 'KinematicPlanner.cpp', './src/mujoco_ompl_interface.cpp', './src/mujoco_wrapper.cpp',
17 | ],
18 | include_dirs=["./include/", '/opt/local/include', eigen_include_path, './3rd_party/include/', '/opt/local/include/boost/',
19 | os.path.join(prefix_path, '.mujoco/mujoco200/include/'), '/usr/local/include/ompl', '/usr/local/include'],
20 | extra_objects=[ompl_lib_path, os.path.join(prefix_path, '.mujoco/mujoco200/bin/libmujoco200.dylib')],
21 | extra_compile_args=['-std=c++11', '-stdlib=libc++'],
22 | language="c++"),
23 | ]
24 | setup(
25 | name='mujoco-ompl',
26 | ext_modules=cythonize(extensions),
27 | )
28 |
--------------------------------------------------------------------------------
/motion_planners/src/compound_state_projector.cpp:
--------------------------------------------------------------------------------
1 | #include "compound_state_projector.h"
2 |
--------------------------------------------------------------------------------
/motion_planners/src/mujoco_wrapper.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "mujoco_wrapper.h"
3 |
4 | using namespace std;
5 |
6 |
7 | //////////////////////////////
8 | // Init static variables
9 | //////////////////////////////
10 |
11 | int MuJoCo::mj_instance_count=0;
12 | std::mutex MuJoCo::mj_instance_count_lock;
13 |
14 | //////////////////////////////
15 | // Define functions
16 | //////////////////////////////
17 |
18 | std::ostream& operator<<(std::ostream& os, const JointInfo& ji) {
19 | os << "Joint( name: \"" << ji.name << "\", "
20 | << "type: " << ji.type << ", "
21 | << "limited: " << ji.limited << ", "
22 | << "range: (" << ji.range[0] << ", " << ji.range[1] << ") "
23 | << ")";
24 | return os;
25 | }
26 |
27 | std::ostream& operator<<(std::ostream& os, const MuJoCoState& s) {
28 | os << "{time: " << s.time << ", "
29 | << "qpos: [";
30 | for(auto const& i : s.qpos) {
31 | os << i << ", ";
32 | }
33 | os << "] qvel: [";
34 | for(auto const& i : s.qvel) {
35 | os << i << ", ";
36 | }
37 | os << "] act: [";
38 | for(auto const& i : s.act) {
39 | os << i << ", ";
40 | }
41 | os << "] ctrl: [";
42 | for(auto const& i : s.ctrl) {
43 | os << i << ", ";
44 | }
45 | os << "]}";
46 | return os;
47 | }
48 |
49 | std::vector getJointInfo(const mjModel* m) {
50 | std::vector joints;
51 | for (size_t i=0; i < m->njnt; i++) {
52 | JointInfo joint;
53 | joint.name = std::string(m->names + m->name_jntadr[i]);
54 | joint.type = m->jnt_type[i];
55 | joint.limited = (bool) m->jnt_limited[i];
56 | joint.range[0] = m->jnt_range[2*i];
57 | joint.range[1] = m->jnt_range[2*i + 1];
58 | joint.qposadr = m->jnt_qposadr[i];
59 | joint.dofadr = m->jnt_dofadr[i];
60 | joints.push_back(joint);
61 | }
62 | return joints;
63 | }
64 |
65 | StateRange getCtrlRange(const mjModel* m, size_t i) {
66 | StateRange r;
67 | r.limited = (bool) m->actuator_ctrllimited[i];
68 | r.range[0] = m->actuator_ctrlrange[2*i];
69 | r.range[1] = m->actuator_ctrlrange[2*i + 1];
70 | return r;
71 | }
72 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | torch
4 | wandb
5 | requests
6 | moviepy==1.0.0
7 | imageio
8 | colorlog
9 | pyquaternion
10 | tqdm
11 | mpi4py
12 | sklearn
13 | h5py
14 | ipdb
15 | scikit-image
16 | opencv-python
17 | dm_control
18 | gym==0.15.4
19 | mujoco-py==2.0.2.5
20 | pybullet==1.9.5
21 |
--------------------------------------------------------------------------------
/rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clvrai/mopa-rl/87b7a3108bb9cf10e482cfa4c8a19dad9e7cc2d5/rl/__init__.py
--------------------------------------------------------------------------------
/rl/base_agent.py:
--------------------------------------------------------------------------------
1 | from gym import spaces
2 | import numpy as np
3 |
4 |
5 | class BaseAgent(object):
6 | def __init__(self, config, ob_space):
7 | self._config = config
8 |
9 | def normalize(self, ob):
10 | if self._config.ob_norm:
11 | return self._ob_norm.normalize(ob)
12 | return ob
13 |
14 | def act(self, ob, is_train=True, return_stds=False, random_exploration=False):
15 | if random_exploration:
16 | ac = self._ac_space.sample()
17 | for k, space in self._ac_space.spaces.items():
18 | if isinstance(space, spaces.Discrete):
19 | ac[k] = np.array([ac[k]])
20 | activation = None
21 | stds = None
22 | return ac, activation, stds
23 |
24 | if return_stds:
25 | ac, activation, stds = self._actor.act(
26 | ob, is_train=is_train, return_stds=return_stds
27 | )
28 | return ac, activation, stds
29 | else:
30 | ac, activation = self._actor.act(
31 | ob, is_train=is_train, return_stds=return_stds
32 | )
33 | return ac, activation
34 |
35 | def store_episode(self, rollouts):
36 | raise NotImplementedError()
37 |
38 | def replay_buffer(self):
39 | return self._buffer.state_dict()
40 |
41 | def load_replay_buffer(self, state_dict):
42 | self._buffer.load_state_dict(state_dict)
43 |
44 | def sync_networks(self):
45 | raise NotImplementedError()
46 |
47 | def train(self):
48 | raise NotImplementedError()
49 |
50 | def _soft_update_target_network(self, target, source, tau):
51 | for target_param, param in zip(target.parameters(), source.parameters()):
52 | target_param.data.copy_((1 - tau) * param.data + tau * target_param.data)
53 |
54 | def _copy_target_network(self, target, source):
55 | for target_param, source_param in zip(target.parameters(), source.parameters()):
56 | target_param.data.copy_(source_param.data)
57 |
--------------------------------------------------------------------------------
/rl/dataset.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from time import time
3 |
4 | import numpy as np
5 |
6 |
7 | class ReplayBuffer:
8 | def __init__(self, keys, buffer_size, sample_func):
9 | self._size = buffer_size
10 |
11 | # memory management
12 | self._idx = 0
13 | self._current_size = 0
14 | self._sample_func = sample_func
15 |
16 | # create the buffer to store info
17 | self._keys = keys
18 | self._buffers = defaultdict(list)
19 |
20 | def clear(self):
21 | self._idx = 0
22 | self._current_size = 0
23 | self._buffers = defaultdict(list)
24 |
25 | # store the episode
26 | def store_episode(self, rollout):
27 | idx = self._idx = (self._idx + 1) % self._size
28 | self._current_size += 1
29 |
30 | if self._current_size > self._size:
31 | for k in self._keys:
32 | self._buffers[k][idx] = rollout[k]
33 | else:
34 | for k in self._keys:
35 | self._buffers[k].append(rollout[k])
36 |
37 | # sample the data from the replay buffer
38 | def sample(self, batch_size):
39 | # sample transitions
40 | transitions = self._sample_func(self._buffers, batch_size)
41 | return transitions
42 |
43 | def state_dict(self):
44 | return self._buffers
45 |
46 | def load_state_dict(self, state_dict):
47 | self._buffers = state_dict
48 | self._current_size = len(self._buffers["ac"])
49 |
50 |
51 | class RandomSampler:
52 | def sample_func(self, episode_batch, batch_size_in_transitions):
53 | rollout_batch_size = len(episode_batch["ac"])
54 | batch_size = batch_size_in_transitions
55 | episode_idxs = np.random.randint(0, rollout_batch_size, batch_size)
56 | t_samples = [
57 | np.random.randint(len(episode_batch["ac"][episode_idx]))
58 | for episode_idx in episode_idxs
59 | ]
60 |
61 | transitions = {}
62 | for key in episode_batch.keys():
63 | transitions[key] = [
64 | episode_batch[key][episode_idx][t]
65 | for episode_idx, t in zip(episode_idxs, t_samples)
66 | ]
67 |
68 | if "ob_next" not in episode_batch.keys():
69 | transitions["ob_next"] = [
70 | episode_batch["ob"][episode_idx][t + 1]
71 | for episode_idx, t in zip(episode_idxs, t_samples)
72 | ]
73 |
74 | new_transitions = {}
75 | for k, v in transitions.items():
76 | if isinstance(v[0], dict):
77 | sub_keys = v[0].keys()
78 | new_transitions[k] = {
79 | sub_key: np.stack([v_[sub_key] for v_ in v]) for sub_key in sub_keys
80 | }
81 | else:
82 | new_transitions[k] = np.stack(v)
83 |
84 | return new_transitions
85 |
86 |
87 | class HERSampler:
88 | def __init__(self, replay_strategy, replay_k, reward_func=None):
89 | self.replay_strategy = replay_strategy
90 | if self.replay_strategy == "future":
91 | self.future_p = 1 - (1.0 / 1 + replay_k)
92 | else:
93 | self.future_p = 0
94 | self.reward_func = reward_func
95 |
96 | def sample_func(self, episode_batch, batch_size_in_transitions):
97 | rollout_batch_size = len(episode_batch["ac"])
98 | batch_size = batch_size_in_transitions
99 |
100 | # select which rollouts and which timesteps to be used
101 | episode_idxs = np.random.randint(0, rollout_batch_size, batch_size)
102 | t_samples = [
103 | np.random.randint(len(episode_batch["ac"][episode_idx]))
104 | for episode_idx in episode_idxs
105 | ]
106 |
107 | transitions = {}
108 | for key in episode_batch.keys():
109 | transitions[key] = [
110 | episode_batch[key][episode_idx][t]
111 | for episode_idx, t in zip(episode_idxs, t_samples)
112 | ]
113 |
114 | transitions["ob_next"] = [
115 | episode_batch["ob"][episode_idx][t + 1]
116 | for episode_idx, t in zip(episode_idxs, t_samples)
117 | ]
118 | transitions["r"] = np.zeros((batch_size,))
119 |
120 | # hindsight experience replay
121 | for i, (episode_idx, t) in enumerate(zip(episode_idxs, t_samples)):
122 | replace_goal = np.random.uniform() < self.future_p
123 | if replace_goal:
124 | future_t = np.random.randint(
125 | t + 1, len(episode_batch["ac"][episode_idx]) + 1
126 | )
127 | future_ag = episode_batch["ag"][episode_idx][future_t]
128 | if (
129 | self.reward_func(
130 | episode_batch["ag"][episode_idx][t], future_ag, None
131 | )
132 | < 0
133 | ):
134 | transitions["g"][i] = future_ag
135 |
136 | transitions["r"][i] = self.reward_func(
137 | episode_batch["ag"][episode_idx][t + 1], transitions["g"][i], None
138 | )
139 |
140 | new_transitions = {}
141 | for k, v in transitions.items():
142 | if isinstance(v[0], dict):
143 | sub_keys = v[0].keys()
144 | new_transitions[k] = {
145 | sub_key: np.stack([v_[sub_key] for v_ in v]) for sub_key in sub_keys
146 | }
147 | else:
148 | new_transitions[k] = np.stack(v)
149 |
150 | return new_transitions
151 |
--------------------------------------------------------------------------------
/rl/main.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import signal
3 | import os
4 | import json
5 | from datetime import datetime
6 |
7 | import numpy as np
8 | import torch
9 | from six.moves import shlex_quote
10 | from mpi4py import MPI
11 | from logging import CRITICAL
12 |
13 | from config import argparser
14 | from config.motion_planner import add_arguments as mp_add_arguments
15 | from rl.trainer import Trainer
16 | from util.logger import logger
17 |
18 |
19 | np.set_printoptions(precision=3)
20 | np.set_printoptions(suppress=True)
21 |
22 |
23 | def run(config):
24 | rank = MPI.COMM_WORLD.Get_rank()
25 | config.rank = rank
26 | config.is_chef = rank == 0
27 | config.seed = config.seed + rank
28 | config.num_workers = MPI.COMM_WORLD.Get_size()
29 | config.is_mpi = False if config.num_workers == 1 else True
30 |
31 | if torch.get_num_threads() != 1:
32 | fair_num_threads = max(
33 | int(torch.get_num_threads() / MPI.COMM_WORLD.Get_size()), 1
34 | )
35 | torch.set_num_threads(fair_num_threads)
36 |
37 | if config.is_chef:
38 | logger.warning("Running a base worker.")
39 | make_log_files(config)
40 | else:
41 | logger.warning("Running worker %d and disabling logger", config.rank)
42 | logger.setLevel(CRITICAL)
43 |
44 | if config.date is None:
45 | now = datetime.now()
46 | date = now.strftime("%m.%d")
47 | else:
48 | date = config.date
49 | config.run_name = "rl.{}.{}.{}.{}".format(
50 | config.env, date, config.prefix, config.seed - rank
51 | )
52 | if config.group is None:
53 | config.group = "rl.{}.{}.{}".format(config.env, date, config.prefix)
54 |
55 | config.log_dir = os.path.join(config.log_root_dir, config.run_name)
56 | if config.is_train:
57 | config.record_dir = os.path.join(config.log_dir, "video")
58 | else:
59 | config.record_dir = os.path.join(config.log_dir, "eval_video")
60 |
61 | def shutdown(signal, frame):
62 | logger.warning("Received signal %s: exiting", signal)
63 | sys.exit(128 + signal)
64 |
65 | signal.signal(signal.SIGHUP, shutdown)
66 | signal.signal(signal.SIGINT, shutdown)
67 | signal.signal(signal.SIGTERM, shutdown)
68 |
69 | # set global seed
70 | np.random.seed(config.seed)
71 | torch.manual_seed(config.seed)
72 | torch.cuda.manual_seed_all(config.seed)
73 |
74 | os.environ["DISPLAY"] = ":1"
75 |
76 | if config.gpu is not None:
77 | os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(config.gpu)
78 | assert torch.cuda.is_available()
79 | config.device = torch.device("cuda")
80 | else:
81 | config.device = torch.device("cpu")
82 |
83 | # build a trainer
84 | trainer = Trainer(config)
85 | if config.is_train:
86 | trainer.train()
87 | logger.info("Finish training")
88 | else:
89 | trainer.evaluate()
90 | logger.info("Finish evaluating")
91 |
92 |
93 | def make_log_files(config):
94 | if config.date is None:
95 | now = datetime.now()
96 | date = now.strftime("%m.%d")
97 | else:
98 | date = config.date
99 | # date = '07.25'
100 | config.run_name = "rl.{}.{}.{}.{}".format(
101 | config.env, date, config.prefix, config.seed
102 | )
103 | if config.group is None:
104 | config.group = "rl.{}.{}.{}".format(config.env, date, config.prefix)
105 |
106 | config.log_dir = os.path.join(config.log_root_dir, config.run_name)
107 | logger.info("Create log directory: %s", config.log_dir)
108 | os.makedirs(config.log_dir, exist_ok=True)
109 |
110 | if config.is_train:
111 | config.record_dir = os.path.join(config.log_dir, "video")
112 | else:
113 | config.record_dir = os.path.join(config.log_dir, "eval_video")
114 | logger.info("Create video directory: %s", config.record_dir)
115 | os.makedirs(config.record_dir, exist_ok=True)
116 |
117 | if config.is_train:
118 | # log git diff
119 | cmds = [
120 | "echo `git rev-parse HEAD` >> {}/git.txt".format(config.log_dir),
121 | "git diff >> {}/git.txt".format(config.log_dir),
122 | "echo 'python -m rl.main {}' >> {}/cmd.sh".format(
123 | " ".join([shlex_quote(arg) for arg in sys.argv[1:]]), config.log_dir
124 | ),
125 | ]
126 | os.system("\n".join(cmds))
127 |
128 | # log config
129 | param_path = os.path.join(config.log_dir, "params.json")
130 | logger.info("Store parameters in %s", param_path)
131 | with open(param_path, "w") as fp:
132 | json.dump(config.__dict__, fp, indent=4, sort_keys=True)
133 |
134 |
135 | if __name__ == "__main__":
136 | parser = argparser()
137 | args, unparsed = parser.parse_known_args()
138 |
139 | if "Pusher" in args.env:
140 | from config.pusher import add_arguments
141 | elif "Sawyer" in args.env:
142 | from config.sawyer import add_arguments
143 | else:
144 | raise ValueError("args.env (%s) is not supported" % args.env)
145 |
146 | add_arguments(parser)
147 | mp_add_arguments(parser)
148 | args, unparsed = parser.parse_known_args()
149 |
150 | if args.debug:
151 | args.rollout_length = 150
152 | args.start_steps = 100
153 |
154 | if len(unparsed):
155 | logger.error("Unparsed argument is detected:\n%s", unparsed)
156 | else:
157 | run(args)
158 |
--------------------------------------------------------------------------------
/rl/planner_agent.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from util.gym import action_size
4 | from util.logger import logger
5 | from motion_planners.sampling_based_planner import SamplingBasedPlanner
6 |
7 |
8 | class PlannerAgent:
9 | def __init__(
10 | self,
11 | config,
12 | ac_space,
13 | non_limited_idx=None,
14 | passive_joint_idx=[],
15 | ignored_contacts=[],
16 | planner_type=None,
17 | goal_bias=0.05,
18 | is_simplified=False,
19 | simplified_duration=0.1,
20 | range_=None,
21 | ):
22 |
23 | self._config = config
24 | self.planner = SamplingBasedPlanner(
25 | config,
26 | config._xml_path,
27 | action_size(ac_space),
28 | non_limited_idx,
29 | planner_type=planner_type,
30 | passive_joint_idx=passive_joint_idx,
31 | ignored_contacts=ignored_contacts,
32 | contact_threshold=config.contact_threshold,
33 | goal_bias=goal_bias,
34 | is_simplified=is_simplified,
35 | simplified_duration=simplified_duration,
36 | range_=range_,
37 | )
38 |
39 | self._is_simplified = is_simplified
40 | self._simplified_duration = simplified_duration
41 |
42 | def plan(self, start, goal, timelimit=None, attempts=15):
43 | config = self._config
44 | if timelimit is None:
45 | timelimit = config.timelimit
46 | traj, states, valid, exact = self.planner.plan(start, goal, timelimit)
47 | success = valid and exact
48 |
49 | if success:
50 | return traj[1:], success, valid, exact
51 | else:
52 | return traj, success, valid, exact
53 |
54 | def get_planner_status(self):
55 | return self.planner.get_planner_status()
56 |
57 | def isValidState(self, state):
58 | return self.planner.isValidState(state)
59 |
--------------------------------------------------------------------------------
/rl/policies/__init__.py:
--------------------------------------------------------------------------------
1 | from .mlp_actor_critic import MlpActor, MlpCritic
2 |
3 |
4 | def get_actor_critic_by_name(name):
5 | if name == "mlp":
6 | return MlpActor, MlpCritic
7 | else:
8 | raise NotImplementedError()
9 |
--------------------------------------------------------------------------------
/rl/policies/distributions.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.distributions
8 |
9 |
10 | # Categorical
11 | FixedCategorical = torch.distributions.Categorical
12 |
13 | old_sample = FixedCategorical.sample
14 | FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1)
15 |
16 | log_prob_cat = FixedCategorical.log_prob
17 | # FixedCategorical.log_probs = lambda self, actions: log_prob_cat(self, actions.squeeze(-1)).view(actions.size(0), -1).sum(-1).unsqueeze(-1)
18 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat(
19 | self, actions.squeeze(-1)
20 | ).unsqueeze(-1)
21 |
22 | categorical_entropy = FixedCategorical.entropy
23 | FixedCategorical.entropy = lambda self: categorical_entropy(self) * 10.0 # scaling
24 |
25 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True)
26 |
27 | # Normal
28 | FixedNormal = torch.distributions.Normal
29 |
30 | normal_init = FixedNormal.__init__
31 | FixedNormal.__init__ = lambda self, mean, std: normal_init(
32 | self, mean.double(), std.double()
33 | )
34 |
35 | log_prob_normal = FixedNormal.log_prob
36 | FixedNormal.log_probs = (
37 | lambda self, actions: log_prob_normal(self, actions.double())
38 | .sum(-1, keepdim=True)
39 | .float()
40 | )
41 |
42 | normal_entropy = FixedNormal.entropy
43 | FixedNormal.entropy = lambda self: normal_entropy(self).sum(-1).float()
44 |
45 | FixedNormal.mode = lambda self: self.mean.float()
46 |
47 | normal_sample = FixedNormal.sample
48 | FixedNormal.sample = lambda self: normal_sample(self).float()
49 |
50 | normal_rsample = FixedNormal.rsample
51 | FixedNormal.rsample = lambda self: normal_rsample(self).float()
52 |
53 |
54 | def init(module, weight_init, bias_init, gain=1):
55 | weight_init(module.weight.data, gain=gain)
56 | bias_init(module.bias.data)
57 | return module
58 |
59 |
60 | class AddBias(nn.Module):
61 | def __init__(self, bias):
62 | super().__init__()
63 | self._bias = nn.Parameter(bias.unsqueeze(1))
64 |
65 | def forward(self, x):
66 | if x.dim() == 2:
67 | bias = self._bias.t().view(1, -1)
68 | else:
69 | bias = self._bias.t().view(1, -1, 1, 1)
70 | return x + bias
71 |
72 |
73 | class Categorical(nn.Module):
74 | def __init__(self):
75 | super().__init__()
76 |
77 | def forward(self, x):
78 | return FixedCategorical(logits=x)
79 |
80 |
81 | class DiagGaussian(nn.Module):
82 | def __init__(self, config):
83 | super().__init__()
84 | self.logstd = AddBias(torch.zeros(config.action_size))
85 | self.config = config
86 |
87 | def forward(self, x):
88 | zeros = torch.zeros(x.size()).to(self.config.device)
89 | logstd = self.logstd(zeros)
90 | return FixedNormal(x, logstd.exp())
91 |
92 |
93 | class MixedDistribution(nn.Module):
94 | def __init__(self, distributions):
95 | super().__init__()
96 | assert isinstance(distributions, OrderedDict)
97 | self.distributions = distributions
98 |
99 | def mode(self):
100 | return OrderedDict([(k, dist.mode()) for k, dist in self.distributions.items()])
101 |
102 | def sample(self):
103 | return OrderedDict(
104 | [(k, dist.sample()) for k, dist in self.distributions.items()]
105 | )
106 |
107 | def rsample(self):
108 | return OrderedDict(
109 | [(k, dist.rsample()) for k, dist in self.distributions.items()]
110 | )
111 |
112 | def log_probs(self, x):
113 | assert isinstance(x, dict)
114 | return OrderedDict(
115 | [(k, dist.log_probs(x[k])) for k, dist in self.distributions.items()]
116 | )
117 |
118 | def entropy(self):
119 | return sum([dist.entropy() for dist in self.distributions.values()])
120 |
121 |
122 | class GumbelSoftmax(torch.distributions.RelaxedOneHotCategorical):
123 | """
124 | A differentiable Categorical distribution using reparametrization trick with Gumbel-Softmax
125 | Explanation http://amid.fish/assets/gumbel.html
126 | NOTE: use this in place PyTorch's RelaxedOneHotCategorical distribution since its log_prob is not working right (returns positive values)
127 | Papers:
128 | [1] The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables (Maddison et al, 2017)
129 | [2] Categorical Reparametrization with Gumbel-Softmax (Jang et al, 2017)
130 | """
131 |
132 | def sample(self, sample_shape=torch.Size()):
133 | """Gumbel-softmax sampling. Note rsample is inherited from RelaxedOneHotCategorical"""
134 | u = torch.empty(
135 | self.logits.size(), device=self.logits.device, dtype=self.logits.dtype
136 | ).uniform_(0, 1)
137 | noisy_logits = self.logits - torch.log(-torch.log(u))
138 | return torch.argmax(noisy_logits, dim=-1)
139 |
140 | def rsample(self, sample_shape=torch.Size()):
141 | """
142 | Gumbel-softmax resampling using the Straight-Through trick.
143 | Credit to Ian Temple for bringing this to our attention. To see standalone code of how this works, refer to https://gist.github.com/yzh119/fd2146d2aeb329d067568a493b20172f
144 | """
145 | rout = super().rsample(sample_shape) # differentiable
146 | out = F.one_hot(torch.argmax(rout, dim=-1), self.logits.shape[-1]).float()
147 | return (out - rout).detach() + rout
148 |
149 | def log_prob(self, value):
150 | """value is one-hot or relaxed"""
151 | # if self.logits.shape[-1] == 1:
152 | # value = torch.zeros_like(value)
153 | if value.shape != self.logits.shape:
154 | value = F.one_hot(value.long(), self.logits.shape[-1]).float()
155 | assert value.shape == self.logits.shape
156 | return -torch.sum(-value * F.log_softmax(self.logits, -1), -1) # scaling
157 |
158 | def entropy(self):
159 | return self.base_dist._categorical.entropy()
160 |
161 |
162 | FixedGumbelSoftmax = GumbelSoftmax
163 | old_sample_gumbel = FixedGumbelSoftmax.sample
164 | FixedGumbelSoftmax.sample = lambda self: old_sample_gumbel(self).unsqueeze(-1)
165 | log_prob_gumbel = FixedGumbelSoftmax.log_prob
166 | FixedGumbelSoftmax.log_probs = lambda self, actions: log_prob_gumbel(
167 | self, actions.squeeze(-1)
168 | ).unsqueeze(-1)
169 | gumbel_entropy = FixedGumbelSoftmax.entropy
170 | FixedGumbelSoftmax.entropy = lambda self: gumbel_entropy(self)
171 | FixedGumbelSoftmax.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True)
172 | gumbel_rsample = FixedGumbelSoftmax.rsample
173 | FixedGumbelSoftmax.rsample = lambda self: gumbel_rsample(self).float()
174 |
--------------------------------------------------------------------------------
/rl/policies/mlp_actor_critic.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch
4 | import torch.nn as nn
5 | from gym import spaces
6 |
7 | from rl.policies.utils import CNN, MLP
8 | from rl.policies.actor_critic import Actor, Critic
9 | from util.gym import observation_size, action_size
10 |
11 |
12 | class MlpActor(Actor):
13 | def __init__(
14 | self,
15 | config,
16 | ob_space,
17 | ac_space,
18 | tanh_policy,
19 | deterministic=False,
20 | activation="relu",
21 | rl_hid_size=None,
22 | ):
23 | super().__init__(config, ob_space, ac_space, tanh_policy)
24 |
25 | self._ac_space = ac_space
26 | self._deterministic = deterministic
27 | if rl_hid_size == None:
28 | rl_hid_size = config.rl_hid_size
29 |
30 | # observation
31 | input_dim = observation_size(ob_space)
32 |
33 | self.fc = MLP(
34 | config,
35 | input_dim,
36 | rl_hid_size,
37 | [rl_hid_size] * config.actor_num_hid_layers,
38 | activation=activation,
39 | )
40 | self.fc_means = nn.ModuleDict()
41 | self.fc_log_stds = nn.ModuleDict()
42 |
43 | for k, space in ac_space.spaces.items():
44 | if isinstance(space, spaces.Box):
45 | self.fc_means.update(
46 | {
47 | k: MLP(
48 | config,
49 | rl_hid_size,
50 | action_size(space),
51 | activation=activation,
52 | )
53 | }
54 | )
55 | if not self._deterministic:
56 | self.fc_log_stds.update(
57 | {
58 | k: MLP(
59 | config,
60 | rl_hid_size,
61 | action_size(space),
62 | activation=activation,
63 | )
64 | }
65 | )
66 | elif isinstance(space, spaces.Discrete):
67 | self.fc_means.update(
68 | {k: MLP(config, rl_hid_size, space.n, activation=activation)}
69 | )
70 | else:
71 | self.fc_means.update(
72 | {k: MLP(config, rl_hid_size, space, activation=activation)}
73 | )
74 |
75 | def forward(self, ob, deterministic=False):
76 | inp = list(ob.values())
77 | if len(inp[0].shape) == 1:
78 | inp = [x.unsqueeze(0) for x in inp]
79 |
80 | out = self._activation_fn(self.fc(torch.cat(inp, dim=-1)))
81 | out = torch.reshape(out, (out.shape[0], -1))
82 |
83 | means, stds = OrderedDict(), OrderedDict()
84 |
85 | for k, space in self._ac_space.spaces.items():
86 | mean = self.fc_means[k](out)
87 | if isinstance(space, spaces.Box) and not self._deterministic:
88 | if self._config.algo == "ppo":
89 | zeros = torch.zeros(mean.size()).to(self._config.device)
90 | log_std = self.fc_log_stds[k](zeros)
91 | else:
92 | log_std = self.fc_log_stds[k](out)
93 | log_std = torch.clamp(log_std, -10, 2)
94 | std = torch.exp(log_std.double())
95 | else:
96 | std = None
97 | means[k] = mean
98 | stds[k] = std
99 | return means, stds
100 |
101 |
102 | class MlpCritic(Critic):
103 | def __init__(
104 | self, config, ob_space, ac_space=None, activation="relu", rl_hid_size=None
105 | ):
106 | super().__init__(config)
107 |
108 | input_dim = observation_size(ob_space)
109 | if ac_space is not None:
110 | input_dim += action_size(ac_space)
111 |
112 | if rl_hid_size == None:
113 | rl_hid_size = config.rl_hid_size
114 |
115 | self.fc = MLP(config, input_dim, 1, [rl_hid_size] * 2, activation=activation)
116 |
117 | def forward(self, ob, ac=None):
118 | inp = list(ob.values())
119 | if len(inp[0].shape) == 1:
120 | inp = [x.unsqueeze(0) for x in inp]
121 | if ac is not None:
122 | ac = list(ac.values())
123 | if len(ac[0].shape) == 1:
124 | ac = [x.unsqueeze(0) for x in ac]
125 | inp.extend(ac)
126 |
127 | out = self.fc(torch.cat(inp, dim=-1))
128 | out = torch.reshape(out, (out.shape[0], 1))
129 |
130 | return out
131 |
132 |
133 | # Necessary for my KFAC implementation.
134 | class AddBias(nn.Module):
135 | def __init__(self, bias):
136 | super(AddBias, self).__init__()
137 | self._bias = nn.Parameter(bias.unsqueeze(1))
138 |
139 | def forward(self, x):
140 | if x.dim() == 2:
141 | bias = self._bias.t().view(1, -1)
142 | else:
143 | bias = self._bias.t().view(1, -1, 1, 1)
144 |
145 | return x + bias
146 |
--------------------------------------------------------------------------------
/rl/policies/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 |
7 | class CNN(nn.Module):
8 | def __init__(self, config, input_dim):
9 | super().__init__()
10 | self.activation_fn = nn.ReLU()
11 |
12 | self.convs = nn.ModuleList()
13 | w = config.img_width
14 | init_ = lambda m: init(
15 | m,
16 | nn.init.orthogonal_,
17 | lambda x: nn.init.constant_(x, 0),
18 | nn.init.calculate_gain("relu"),
19 | )
20 |
21 | for k, s, d in zip(config.kernel_size, config.stride, config.conv_dim):
22 | self.convs.append(init_(nn.Conv2d(input_dim, d, int(k), int(s))))
23 | w = int(np.floor((w - (int(k) - 1) - 1) / int(s) + 1))
24 | input_dim = d
25 |
26 | # screen_width == 32 (8,4)-(3,2) -> 3x3
27 | # screen_width == 64 (8,4)-(3,2)-(3,2) -> 3x3
28 | # screen_width == 128 (8,4)-(3,2)-(3,2)-(3,2) -> 3x3
29 | # screen_width == 256 (8,4)-(3,2)-(3,2)-(3,2) -> 7x7
30 |
31 | print("Output of CNN = %d x %d x %d" % (w, w, d))
32 | self.w = w
33 | self.output_size = w * w * d
34 |
35 | def forward(self, ob):
36 | out = ob
37 | for conv in self.convs:
38 | out = self.activation_fn(conv(out))
39 | out = out.flatten(start_dim=1)
40 | return out
41 |
42 |
43 | def fanin_init(tensor):
44 | size = tensor.size()
45 | if len(size) == 2:
46 | fan_in = size[0]
47 | elif len(size) > 2:
48 | fan_in = np.prod(size[1:])
49 | else:
50 | raise Exception("Shape must be have dimension at least 2.")
51 | bound = 1.0 / np.sqrt(fan_in)
52 | return tensor.data.uniform_(-bound, bound)
53 |
54 |
55 | class MLP(nn.Module):
56 | def __init__(
57 | self,
58 | config,
59 | input_dim,
60 | output_dim,
61 | hid_dims=[],
62 | last_activation=False,
63 | activation="relu",
64 | ):
65 | super().__init__()
66 | if activation == "relu":
67 | activation_fn = nn.ReLU()
68 | elif activation == "tanh":
69 | activation_fn = nn.Tanh()
70 | elif acitvation == "elu":
71 | activation_fn = nn.Elu()
72 | else:
73 | return NotImplementedError
74 |
75 | init_ = lambda m: init(
76 | m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)
77 | )
78 |
79 | fc = []
80 | prev_dim = input_dim
81 | for d in hid_dims:
82 | fc.append(nn.Linear(prev_dim, d))
83 | fanin_init(fc[-1].weight)
84 | fc[-1].bias.data.fill_(0.1)
85 | fc.append(activation_fn)
86 | prev_dim = d
87 | fc.append(nn.Linear(prev_dim, output_dim))
88 | fc[-1].weight.data.uniform_(-1e-3, 1e-3)
89 | fc[-1].bias.data.uniform_(-1e-3, 1e-3)
90 | if last_activation:
91 | fc.append(activation_fn)
92 | self.fc = nn.Sequential(*fc)
93 |
94 | def forward(self, ob):
95 | return self.fc(ob)
96 |
97 |
98 | def init(module, weight_init, bias_init, gain=1):
99 | weight_init(module.weight.data, gain=gain)
100 | bias_init(module.bias.data)
101 | return module
102 |
--------------------------------------------------------------------------------
/scripts/2d/baseline.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 | algo='sac'
5 | prefix="BASELINE.SAC"
6 | env="PusherObstacle-v0"
7 | max_episode_step="400"
8 | debug="True"
9 | log_root_dir="./logs"
10 | reward_scale='10.'
11 | vis_replay="True"
12 | success_reward='150.'
13 |
14 | python -m rl.main \
15 | --log_root_dir $log_root_dir \
16 | --prefix $prefix \
17 | --env $env \
18 | --gpu $gpu \
19 | --max_episode_step $max_episode_step \
20 | --debug $debug \
21 | --algo $algo \
22 | --seed $seed \
23 | --reward_scale $reward_scale \
24 | --vis_replay $vis_replay \
25 | --success_reward $success_reward \
26 |
--------------------------------------------------------------------------------
/scripts/2d/baseline_ik.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 | algo='sac'
5 | prefix="BASELINE.SAC.IK"
6 | env="PusherObstacle-v0"
7 | gpu=$gpu
8 | max_episode_step="400"
9 | debug="False"
10 | log_root_dir="./logs"
11 | reward_scale='10.'
12 | vis_replay="True"
13 | success_reward='150.'
14 | use_ik_target="True"
15 | action_range="0.01"
16 |
17 | python -m rl.main \
18 | --log_root_dir $log_root_dir \
19 | --prefix $prefix \
20 | --env $env \
21 | --gpu $gpu \
22 | --max_episode_step $max_episode_step \
23 | --debug $debug \
24 | --algo $algo \
25 | --seed $seed \
26 | --reward_scale $reward_scale \
27 | --vis_replay $vis_replay \
28 | --success_reward $success_reward \
29 | --use_ik_target $use_ik_target \
30 | --ckpt_interval $ckpt_interval \
31 | --action_range $action_range
32 |
--------------------------------------------------------------------------------
/scripts/2d/baseline_lg.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 | algo='sac'
5 | prefix="BASELINE.SAC.LG"
6 | env="PusherObstacle-v0"
7 | max_episode_step="400"
8 | debug="False"
9 | log_root_dir="./logs"
10 | reward_scale='10.'
11 | vis_replay="True"
12 | success_reward='150.'
13 | expand_ac_space="True"
14 | action_range='1.0'
15 | use_smdp_update="True"
16 |
17 | python -m rl.main \
18 | --log_root_dir $log_root_dir \
19 | --prefix $prefix \
20 | --env $env \
21 | --gpu $gpu \
22 | --max_episode_step $max_episode_step \
23 | --debug $debug \
24 | --algo $algo \
25 | --seed $seed \
26 | --reward_scale $reward_scale \
27 | --vis_replay $vis_replay \
28 | --success_reward $success_reward \
29 | --expand_ac_space $expand_ac_space \
30 | --action_range $action_range \
31 | --use_smdp_update $use_smdp_update
32 |
--------------------------------------------------------------------------------
/scripts/2d/mopa.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 |
5 | algo='sac'
6 | prefix="MoPA-SAC"
7 | env="PusherObstacle-v0"
8 | max_episode_step="400"
9 | debug="False"
10 | log_root_dir="./logs"
11 | mopa="True"
12 | reward_scale="0.2"
13 | reuse_data="True"
14 | action_range="1.0"
15 | omega='0.5'
16 | use_smdp_update="True"
17 | stochastic_eval="True"
18 | invalid_target_handling="True"
19 | max_reuse_data='30'
20 | ac_space_type="piecewise"
21 | success_reward="150.0"
22 |
23 |
24 | python -m rl.main \
25 | --log_root_dir $log_root_dir \
26 | --prefix $prefix \
27 | --env $env \
28 | --gpu $gpu \
29 | --max_episode_step $max_episode_step \
30 | --debug $debug \
31 | --algo $algo \
32 | --seed $seed \
33 | --mopa $mopa \
34 | --reward_scale $reward_scale \
35 | --reuse_data $reuse_data \
36 | --action_range $action_range \
37 | --omega $omega \
38 | --success_reward $success_reward \
39 | --stochastic_eval $stochastic_eval \
40 | --invalid_target_handling $invalid_target_handling \
41 | --max_reuse_data $max_reuse_data \
42 | --ac_space_type $ac_space_type \
43 | --use_smdp_update $use_smdp_update \
44 |
--------------------------------------------------------------------------------
/scripts/2d/mopa_discrete.sh:
--------------------------------------------------------------------------------
1 | #/!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 | algo='sac'
5 | prefix="MoPA-SAC.discrete"
6 | env="PusherObstacle-v0"
7 | max_episode_step="400"
8 | debug="False"
9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | mopa="True"
12 | reward_scale="0.2"
13 | reuse_data="True"
14 | action_range="1.0"
15 | stochastic_eval="True"
16 | invalid_target_handling="True"
17 | max_reuse_data='30'
18 | use_smdp_update="True"
19 | ac_space_type="normal"
20 | success_reward="150.0"
21 | discrete_action="True"
22 |
23 |
24 | #mpiexec -n $workers
25 | python -m rl.main \
26 | --log_root_dir $log_root_dir \
27 | --prefix $prefix \
28 | --env $env \
29 | --gpu $gpu \
30 | --max_episode_step $max_episode_step \
31 | --debug $debug \
32 | --algo $algo \
33 | --seed $seed \
34 | --reward_type $reward_type \
35 | --comment $comment \
36 | --mopa $mopa \
37 | --reward_scale $reward_scale \
38 | --reuse_data $reuse_data \
39 | --action_range $action_range \
40 | --discrete_action $discrete_action \
41 | --success_reward $success_reward \
42 | --stochastic_eval $stochastic_eval \
43 | --invalid_target_handling $invalid_target_handling \
44 | --max_reuse_data $max_reuse_data \
45 | --use_smdp_update $use_smdp_update \
46 | --ac_space_type $ac_space_type \
47 |
--------------------------------------------------------------------------------
/scripts/2d/mopa_ik.sh:
--------------------------------------------------------------------------------
1 | #/!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 |
5 | algo='sac'
6 | prefix="MoPA-SAC.IK"
7 | env="PusherObstacle-v0"
8 | max_episode_step="400"
9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | mopa="True"
13 | reward_scale="0.2"
14 | reuse_data="True"
15 | action_range="0.1"
16 | stochastic_eval="True"
17 | invalid_target_handling="True"
18 | max_reuse_data='30'
19 | use_smdp_update="True"
20 | success_reward="150.0"
21 | use_ik_target="True"
22 | ik_target="fingertip"
23 | omega='0.1'
24 |
25 | python -m rl.main \
26 | --log_root_dir $log_root_dir \
27 | --prefix $prefix \
28 | --env $env \
29 | --gpu $gpu \
30 | --max_episode_step $max_episode_step \
31 | --debug $debug \
32 | --algo $algo \
33 | --seed $seed \
34 | --reward_type $reward_type \
35 | --mopa $mopa \
36 | --reward_scale $reward_scale \
37 | --reuse_data $reuse_data \
38 | --action_range $action_range \
39 | --success_reward $success_reward \
40 | --stochastic_eval $stochastic_eval \
41 | --invalid_target_handling $invalid_target_handling \
42 | --max_reuse_data $max_reuse_data \
43 | --use_smdp_update $use_smdp_update \
44 | --use_ik_target $use_ik_target \
45 | --ik_target $ik_target \
46 | --omega $omega
47 |
--------------------------------------------------------------------------------
/scripts/3d/assembly/baseline.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 | prefix="BASELINE"
5 | env="SawyerAssemblyObstacle-v0"
6 | algo='sac'
7 | max_episode_step="250"
8 | debug="False"
9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | vis_replay="True"
12 | plot_type='3d'
13 | success_reward='150.'
14 | reward_scale="10."
15 |
16 | python -m rl.main \
17 | --log_root_dir $log_root_dir \
18 | --prefix $prefix \
19 | --env $env \
20 | --gpu $gpu \
21 | --max_episode_step $max_episode_step \
22 | --debug $debug \
23 | --algo $algo \
24 | --seed $seed \
25 | --reward_type $reward_type \
26 | --vis_replay $vis_replay \
27 | --plot_type $plot_type \
28 | --success_reward $success_reward \
29 | --reward_scale $reward_scale
30 |
--------------------------------------------------------------------------------
/scripts/3d/assembly/baseline_ik.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 | prefix="BASELINE.IK"
5 | env="SawyerAssemblyObstacle-v0"
6 | algo='sac'
7 | max_episode_step="250"
8 | debug="False"
9 | reward_type='sparse'
10 | log_root_dir="./logs"
11 | vis_replay="True"
12 | plot_type='3d'
13 | success_reward='150.'
14 | reward_scale="10."
15 | use_ik_target="True"
16 | ik_target="grip_site"
17 | action_range="0.001"
18 |
19 | python -m rl.main \
20 | --log_root_dir $log_root_dir \
21 | --prefix $prefix \
22 | --env $env \
23 | --gpu $gpu \
24 | --max_episode_step $max_episode_step \
25 | --debug $debug \
26 | --algo $algo \
27 | --seed $seed \
28 | --reward_type $reward_type \
29 | --vis_replay $vis_replay \
30 | --plot_type $plot_type \
31 | --success_reward $success_reward \
32 | --reward_scale $reward_scale \
33 | --use_ik_target $use_ik_target \
34 | --ik_target $ik_target \
35 | --action_range $action_range
36 |
--------------------------------------------------------------------------------
/scripts/3d/assembly/baseline_lg.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | gpu=$1
3 | seed=$2
4 | prefix="BASELINE.SAC.LG"
5 | env="SawyerAssemblyObstacle-v0"
6 | data='08.09'
7 | algo='sac'
8 | max_episode_step="250"
9 | debug="False"
10 | reward_type='sparse'
11 | log_root_dir="./logs"
12 | vis_replay="True"
13 | plot_type='3d'
14 | success_reward='150.'
15 | reward_scale="10."
16 | action_range="0.5"
17 | expand_ac_space='True'
18 | use_smdp_update="True"
19 |
20 | python -m rl.main \
21 | --log_root_dir $log_root_dir \
22 | --prefix $prefix \
23 | --env $env \
24 | --gpu $gpu \
25 | --max_episode_step $max_episode_step \
26 | --debug $debug \
27 | --algo $algo \
28 | --seed $seed \
29 | --reward_type $reward_type \
30 | --vis_replay $vis_replay \
31 | --plot_type $plot_type \
32 | --success_reward $success_reward \
33 | --reward_scale $reward_scale \
34 | --action_range $action_range \
35 | --expand_ac_space $expand_ac_space \
36 | --use_smdp_update $use_smdp_update
37 |
--------------------------------------------------------------------------------
/scripts/3d/assembly/mopa.sh:
--------------------------------------------------------------------------------
1 | # 0.0001:
126 | theta = 2 * np.arcsin(sin_theta)
127 | theta *= 1 if quat[0] >= 0 else -1
128 | axis = quat[1:] / sin_theta
129 |
130 | return axis, theta
131 |
132 |
133 | def quat_to_zangle(quat):
134 | q = quat_mul(quat_inv(quat_create(np.array([0, 1.0, 0]), np.pi / 2)), quat)
135 | ax, angle = quat2axisangle(q)
136 | return angle
137 |
138 |
139 | def zangle_to_quat(zangle):
140 | """
141 | :param zangle in rad
142 | :return: quaternion
143 | """
144 | return quat_mul(
145 | quat_create(np.array([0, 1.0, 0]), np.pi / 2),
146 | quat_create(np.array([-1.0, 0, 0]), zangle),
147 | )
148 |
149 |
150 | def quat_create(axis, angle):
151 | """
152 | Create a quaternion from an axis and angle.
153 | :param axis The three dimensional axis
154 | :param angle The angle in radians
155 | :return: A 4-d array containing the components of a quaternion.
156 | """
157 | quat = np.zeros([4], dtype="float")
158 | mujoco_py.functions.mju_axisAngle2Quat(quat, axis, angle)
159 | return quat
160 |
161 |
162 | def quat_inv(quat):
163 | """
164 | Invert a quaternion, represented by a 4d array.
165 | :param A quaternion (4-d array). Must not be the zero quaternion (all elements equal to zero)
166 | :return: A 4-d array containing the components of a quaternion.
167 | """
168 | d = 1.0 / np.sum(quat ** 2)
169 | return d * np.array([1.0, -1.0, -1.0, -1.0]) * quat
170 |
171 |
172 | def quat_mul(quat1, quat2):
173 | """
174 | Multiply two quaternions, both represented as 4-d arrays.
175 | """
176 | prod_quat = np.zeros([4], dtype="float")
177 | mujoco_py.functions.mju_mulQuat(prod_quat, quat1, quat2)
178 | return prod_quat
179 |
--------------------------------------------------------------------------------