├── .coveragerc ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── environment.yml ├── examples ├── rl │ ├── restore_training.py │ ├── run_3link_nn.py │ ├── run_3link_residual_rmp.py │ ├── run_franka_nn.py │ ├── run_franka_residual_rmp.py │ └── run_policy_rollouts.py └── rmp2 │ ├── rmp2_3link.py │ └── rmp2_franka.py ├── rmp2 ├── configs │ ├── 3link_config.yaml │ ├── 3link_residual_config.yaml │ ├── 3link_residual_rmp_config.yaml │ ├── franka_config.yaml │ ├── franka_residual_config.yaml │ └── franka_residual_rmp_config.yaml ├── envs │ ├── __init__.py │ ├── franka_env.py │ ├── franka_residual_env.py │ ├── franka_residual_rmp_env.py │ ├── robot_env.py │ ├── robot_sim.py │ ├── three_link_env.py │ ├── three_link_residual_env.py │ └── three_link_residual_rmp_env.py ├── kinematics │ ├── __init__.py │ └── tf_fk.py ├── policies │ ├── gaussian_policy.py │ └── policy_networks.py ├── rmpgraph │ ├── __init__.py │ ├── rmpgraph.py │ ├── rmps │ │ ├── __init__.py │ │ └── rmps.py │ ├── robotics.py │ └── taskmaps │ │ ├── __init__.py │ │ └── distance_taskmaps.py ├── urdf │ ├── franka_panda │ │ ├── LICENSE.txt │ │ └── panda.urdf │ ├── meshes │ │ ├── collision │ │ │ ├── finger.obj │ │ │ ├── hand.obj │ │ │ ├── link0.obj │ │ │ ├── link1.obj │ │ │ ├── link2.obj │ │ │ ├── link3.obj │ │ │ ├── link4.obj │ │ │ ├── link5.obj │ │ │ ├── link6.mtl │ │ │ ├── link6.obj │ │ │ └── link7.obj │ │ └── visual │ │ │ ├── colors.png │ │ │ ├── finger.mtl │ │ │ ├── finger.obj │ │ │ ├── hand.mtl │ │ │ ├── hand.obj │ │ │ ├── link1.mtl │ │ │ ├── link1.obj │ │ │ ├── link2.mtl │ │ │ ├── link2.obj │ │ │ ├── link3.mtl │ │ │ ├── link3.obj │ │ │ ├── link4.mtl │ │ │ ├── link4.obj │ │ │ ├── link5.mtl │ │ │ ├── link5.obj │ │ │ ├── link6.mtl │ │ │ ├── link6.obj │ │ │ └── visualShapeBench.json_0.json │ ├── panda.urdf │ └── three_link_planar_robot.urdf └── utils │ ├── __init__.py │ ├── bullet_utils.py │ ├── env_wrappers.py │ ├── np_utils.py │ ├── plot_configs.py │ ├── python_utils.py │ ├── rllib_utils.py │ ├── robot_config_utils.py │ ├── tf_transform_utils.py │ └── tf_utils.py └── startup.sh /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = 3 | robolearn 4 | 5 | omit = 6 | tests 7 | examples 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # vim 132 | *.sw* 133 | 134 | #hydra 135 | outputs/ 136 | multirun/ 137 | 138 | # vscode 139 | .vscode/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 UW Robot Learning Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RMP2 2 | 3 | 4 | Code for R:SS 2021 paper *RMP2: A Structured Composable Policy Class for Robot Learning*. [[Paper](https://arxiv.org/abs/2103.05922)] 5 | 6 | ### Installation 7 | ``` 8 | git clone https://github.com/UWRobotLearning/rmp2.git 9 | cd rmp2 10 | conda env create -f environment.yml 11 | . startup.sh 12 | ``` 13 | 14 | ### Hand-designed RMP2 for Robot Control 15 | To run a goal reaching task for a Franka robot: 16 | ``` 17 | python examples/rmp2/rmp2_franka.py 18 | ``` 19 | 20 | To run a goal reaching task for a 3-link robot: 21 | ``` 22 | python examples/rmp2/rmp2_3link.py 23 | ``` 24 | 25 | ### Training RMP2 Policies with RL 26 | **Note:** The instruction below is for the 3-link robot. To run experiments with the franka robot, simply replace `3link` by `franka`. 27 | 28 | To train an NN policy from scratch (without RMP2): 29 | ``` 30 | python run_3link_nn.py 31 | ``` 32 | 33 | To train an NN residual policy: 34 | ``` 35 | python run_3link_nn.py --env 3link_residual 36 | ``` 37 | 38 | To train an RMP residual policy: 39 | ``` 40 | python run_3link_residual_rmp.py 41 | ``` 42 | 43 | To restore training of a policy: 44 | ``` 45 | python restore_training.py --ckpt-path ~/ray_results/[EXPERIMENT_NAME]/[RUN_NAME]/ 46 | ``` 47 | 48 | To visualize the trained policy: 49 | ``` 50 | python examples/rl/run_policy_rollouts.py --ckpt-path ~/ray_results/[EXPERIMENT_NAME]/[RUN_NAME]/ 51 | ``` 52 | 53 | ### Citation 54 | If you use this source code, please cite the below article, 55 | 56 | ``` 57 | @inproceedings{Li-RSS-21, 58 | author = "Li, Anqi and Cheng, Ching-An and Rana, M Asif and Xie, Man and Van Wyk, Karl and Ratliff, Nathan and Boots, Byron", 59 | booktitle = "Robotics: Science and Systems ({R:SS})", 60 | title = "{{RMP}2: A Structured Composable Policy Class for Robot Learning}", 61 | year = "2021" 62 | } 63 | ``` 64 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: rmp2rl 2 | channels: 3 | - conda-forge 4 | - pytorch 5 | dependencies: 6 | - python=3.7.6 7 | - numpy==1.19.0 8 | - pytorch==1.4.0 9 | - torchvision 10 | - pip 11 | - pip: 12 | - catkin_pkg 13 | - scipy==1.4.1 14 | - tqdm==4.32.1 15 | - matplotlib==3.2.2 16 | - tensorflow-cpu==2.3.1 17 | - ray[rllib]==1.0.0 18 | - ray[debug]==1.0.0 19 | - tensorflow-probability==0.11.1 20 | - urdf-parser-py 21 | - torchviz 22 | - natsort 23 | - pybullet 24 | - pyyaml -------------------------------------------------------------------------------- /examples/rl/restore_training.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for restoring training of nn policy or residual nn policy for the 3-link robot 3 | """ 4 | 5 | import argparse 6 | 7 | import ray 8 | from ray.tune.tune import run_experiments 9 | from rmp2.utils.rllib_utils import register_envs_and_models 10 | import numpy as np 11 | 12 | import natsort 13 | import os 14 | import json 15 | 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("--run", type=str, default="PPO") 19 | parser.add_argument("--stop-iters", type=int, default=500) 20 | parser.add_argument("--checkpoint-freq", type=int, default=1) 21 | 22 | parser.add_argument("--ckpt-path", type=str) 23 | parser.add_argument("--n-workers", type=int, default=None) 24 | 25 | 26 | if __name__ == "__main__": 27 | args = parser.parse_args() 28 | 29 | if not args.ckpt_path.endswith(os.sep): 30 | args.ckpt_path += os.sep 31 | ckpt_dirs = [name for name in os.listdir(args.ckpt_path) if os.path.isdir(os.path.join(args.ckpt_path, name))] 32 | ckpt_dirs = natsort.natsorted(ckpt_dirs,reverse=True) 33 | ckpt_number = int(ckpt_dirs[0].split('_')[1]) 34 | ckpt_dir =os.path.join( 35 | args.ckpt_path, 36 | ckpt_dirs[0]) 37 | 38 | agent_ckpt_path = os.path.join( 39 | ckpt_dir, 40 | 'checkpoint-{}'.format(ckpt_number)) 41 | 42 | print('checkpoint number', ckpt_number) 43 | 44 | # load experiment config 45 | params_file = os.path.join(args.ckpt_path, "params.json") 46 | with open(params_file) as f: 47 | config = json.load(f) 48 | 49 | env = config['env'] 50 | if 'hidden_units' in config['model']['custom_model_config']['policy_config']: 51 | nn_size = config['model']['custom_model_config']['policy_config']['hidden_units'][0] 52 | else: 53 | nn_size = config['model']['custom_model_config']['policy_config']['units'][0] 54 | 55 | lr = config['lr'] 56 | clip_param = config['clip_param'] 57 | lambd = config['lambda'] 58 | batch_size = config['train_batch_size'] 59 | obs_num = config['env_config']['max_obstacle_num'] 60 | goal_angle_range = config['env_config']['goal_torus_angle_range'] / np.pi 61 | goal_minor_radius = config['env_config']['goal_torus_minor_radius'] 62 | 63 | experiment_name = args.ckpt_path.split(os.sep)[-3] 64 | 65 | if args.n_workers is not None: 66 | config['num_workers'] = args.n_workers 67 | 68 | # initialize ray 69 | ray.init() 70 | 71 | # register customized environments and models within ray 72 | register_envs_and_models() 73 | 74 | 75 | stop = { 76 | "training_iteration": args.stop_iters, 77 | } 78 | 79 | # experiment configuration 80 | experiments = { 81 | experiment_name: { # i.e. log to ~/ray_results/default 82 | "run": args.run, 83 | "checkpoint_freq": args.checkpoint_freq, 84 | "checkpoint_at_end": True, 85 | "stop": stop, 86 | "config": config, 87 | "restore": agent_ckpt_path, 88 | } 89 | } 90 | 91 | # resume training 92 | run_experiments(experiments, reuse_actors=True, concurrent=True) 93 | 94 | # shut down ray 95 | ray.shutdown() -------------------------------------------------------------------------------- /examples/rl/run_3link_nn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for training nn policy or residual nn policy for the 3-link robot 3 | """ 4 | 5 | import argparse 6 | 7 | import ray 8 | from ray.tune import grid_search 9 | from ray.tune.tune import run_experiments 10 | from rmp2.utils.rllib_utils import register_envs_and_models 11 | import numpy as np 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | # training config 16 | parser.add_argument("--run", type=str, default="PPO") 17 | parser.add_argument("--experiment-name", type=str, default=None) 18 | parser.add_argument("--stop-iters", type=int, default=500) 19 | parser.add_argument("--checkpoint-freq", type=int, default=1) 20 | parser.add_argument("--n-seeds", type=int, default=1) 21 | # env config 22 | parser.add_argument("--env", type=str, default='3link') # or 3link_residual for residual training 23 | parser.add_argument("--fixed-goal", action='store_true') 24 | parser.add_argument("--fixed-init", action='store_true') 25 | parser.add_argument("--fixed-obs", action='store_true') 26 | parser.add_argument("--obs-free", action='store_true') 27 | parser.add_argument("--goal-reward-model", type=str, default="gaussian") 28 | parser.add_argument("--goal-angle-range", type=float, default=2) 29 | parser.add_argument("--goal-minor-radius", type=float, default=0.375) 30 | parser.add_argument("--obs-num", type=int, default=3) 31 | # hyperparameters 32 | parser.add_argument("--nn-size", type=int, default=256) 33 | parser.add_argument("--lr", type=float, default=5e-5) 34 | parser.add_argument("--clip-param", type=float, default=0.2) 35 | parser.add_argument("--lambd", type=float, default=0.99) 36 | parser.add_argument("--batch-size", type=int, default=67312) 37 | parser.add_argument("--sgd-minibatch-size", type=int, default=4096) 38 | # parallelism 39 | parser.add_argument("--n-workers", type=int, default=1) 40 | 41 | 42 | if __name__ == "__main__": 43 | args = parser.parse_args() 44 | if args.experiment_name is None: 45 | experiment_name = args.env + '-nn-' + str(args.nn_size) + '-lr-' + str(args.lr) + \ 46 | '-cp-' + str(args.clip_param) + '-l-' + str(args.lambd) + '-bs-' + str(args.batch_size) +\ 47 | '-obs-num' + str(args.obs_num) + '-ar-' + str(args.goal_angle_range) + \ 48 | '-mr-' + str(args.goal_minor_radius) 49 | if args.fixed_goal: 50 | experiment_name += '-fixed_goal' 51 | if args.fixed_init: 52 | experiment_name += '-fixed_init' 53 | if args.fixed_obs and not args.obs_free: 54 | experiment_name += '-fixed_obs' 55 | if args.obs_free: 56 | experiment_name += '-obs_free' 57 | if not args.goal_reward_model == "gaussian": 58 | experiment_name += '-' + args.goal_reward_model 59 | else: 60 | experiment_name = args.experiment_name 61 | 62 | # initialize ray 63 | ray.init() 64 | # register customized environments and models within ray 65 | register_envs_and_models() 66 | 67 | # environment configuration 68 | env_config = { 69 | "horizon": 1800, 70 | "max_obstacle_num": args.obs_num, 71 | "min_obstacle_num": args.obs_num, 72 | "goal_reward_model": args.goal_reward_model, 73 | "goal_torus_angle_center": np.pi, 74 | "goal_torus_angle_range": args.goal_angle_range * np.pi, 75 | "goal_torus_minor_radius": args.goal_minor_radius, 76 | } 77 | if args.fixed_goal: 78 | env_config['goal'] = [0.25, 0.5] 79 | if args.fixed_init: 80 | env_config['q_init'] = [0., 0., 0.] 81 | if args.fixed_obs and not args.obs_free: 82 | env_config['obstacle_configs'] = [[{'center': [0.0, 0.5], 'radius': 0.1}]] 83 | if args.obs_free: 84 | env_config['obstacle_configs'] = [[]] 85 | 86 | # experiment configuration 87 | config = { 88 | "env": args.env, 89 | "env_config": env_config, 90 | "model": { 91 | "custom_model": "diag_gaussian_model", 92 | "custom_model_config": { 93 | "policy_config": { 94 | "model": "mlp", 95 | "hidden_units": (args.nn_size, args.nn_size // 2), 96 | "activation": "relu", 97 | "hidden_layer_init_scale": 2.0, 98 | "output_layer_init_scale": 0.1, 99 | }, 100 | "value_config": { 101 | "model": "mlp", 102 | "hidden_units": (256, 128) 103 | }, 104 | "env_wrapper": args.env, 105 | "init_lstd": -1., 106 | "min_std": 1e-4, 107 | "dtype": "float32", 108 | }, 109 | }, 110 | "batch_mode": "complete_episodes", 111 | "rollout_fragment_length": 600, 112 | "train_batch_size": args.batch_size, 113 | "sgd_minibatch_size": args.sgd_minibatch_size, 114 | "lr": args.lr, 115 | "lambda": args.lambd, 116 | "clip_param": args.clip_param, 117 | "vf_clip_param": 500.0, 118 | "grad_clip": 1e6, 119 | "clip_actions": False, 120 | "num_workers": args.n_workers, 121 | "num_envs_per_worker": 1, 122 | "num_cpus_per_worker": 1, 123 | "num_cpus_for_driver": 1, 124 | "framework": "tf2", 125 | "seed": grid_search([(i + 1) * 100 for i in range(args.n_seeds)]) 126 | } 127 | 128 | stop = { 129 | "training_iteration": args.stop_iters, 130 | } 131 | 132 | experiments = { 133 | experiment_name: { 134 | "run": args.run, 135 | "checkpoint_freq": args.checkpoint_freq, 136 | "checkpoint_at_end": True, 137 | "stop": stop, 138 | "config": config, 139 | } 140 | } 141 | 142 | # run experiments 143 | run_experiments(experiments, reuse_actors=True, concurrent=True) 144 | 145 | # shut down ray 146 | ray.shutdown() -------------------------------------------------------------------------------- /examples/rl/run_3link_residual_rmp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for training residual rmp policy for the 3-link robot 3 | """ 4 | 5 | import argparse 6 | 7 | import ray 8 | from ray.tune import grid_search 9 | from ray.tune.tune import run_experiments 10 | from rmp2.utils.rllib_utils import register_envs_and_models 11 | import numpy as np 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | # training config 16 | parser.add_argument("--run", type=str, default="PPO") 17 | parser.add_argument("--experiment-name", type=str, default=None) 18 | parser.add_argument("--stop-iters", type=int, default=500) 19 | parser.add_argument("--checkpoint-freq", type=int, default=1) 20 | parser.add_argument("--n-seeds", type=int, default=1) 21 | # env config 22 | parser.add_argument("--env", type=str, default='3link_rmp') 23 | parser.add_argument("--fixed-goal", action='store_true') 24 | parser.add_argument("--fixed-init", action='store_true') 25 | parser.add_argument("--fixed-obs", action='store_true') 26 | parser.add_argument("--obs-free", action='store_true') 27 | parser.add_argument("--goal-reward-model", type=str, default="gaussian") 28 | parser.add_argument("--goal-angle-range", type=float, default=2) 29 | parser.add_argument("--goal-minor-radius", type=float, default=0.375) 30 | parser.add_argument("--obs-num", type=int, default=3) 31 | # hyperparameters 32 | parser.add_argument("--nn-size", type=int, default=128) 33 | parser.add_argument("--activation", type=str, default='elu') 34 | parser.add_argument("--lr", type=float, default=5e-5) 35 | parser.add_argument("--clip-param", type=float, default=0.2) 36 | parser.add_argument("--lambd", type=float, default=0.99) 37 | parser.add_argument("--batch-size", type=int, default=67312) 38 | parser.add_argument("--sgd-minibatch-size", type=int, default=4096) 39 | parser.add_argument("--layer-init-scale", type=float, default=0.01) 40 | # parallelism 41 | parser.add_argument("--n-workers", type=int, default=1) 42 | 43 | 44 | if __name__ == "__main__": 45 | args = parser.parse_args() 46 | if args.experiment_name is None: 47 | experiment_name = args.env + '-flatrmp-' + str(args.nn_size) + '-lr-' + str(args.lr) + \ 48 | '-cp-' + str(args.clip_param) + '-l-' + str(args.lambd) + '-bs-' + str(args.batch_size) +\ 49 | '-obs-num' + str(args.obs_num) + '-ar-' + str(args.goal_angle_range) + \ 50 | '-mr-' + str(args.goal_minor_radius) + '-is-' + str(args.layer_init_scale) 51 | if args.fixed_goal: 52 | experiment_name += '-fixed_goal' 53 | if args.fixed_init: 54 | experiment_name += '-fixed_init' 55 | if args.fixed_obs and not args.obs_free: 56 | experiment_name += '-fixed_obs' 57 | if args.obs_free: 58 | experiment_name += '-obs_free' 59 | if not args.goal_reward_model == "gaussian": 60 | experiment_name += '-' + args.goal_reward_model 61 | else: 62 | experiment_name = args.experiment_name 63 | 64 | # initialize ray 65 | ray.init() 66 | # register customized environments and models within ray 67 | register_envs_and_models() 68 | 69 | # environment configuration 70 | env_config = { 71 | "horizon": 1800, 72 | "max_obstacle_num": args.obs_num, 73 | "min_obstacle_num": args.obs_num, 74 | "goal_reward_model": args.goal_reward_model, 75 | "goal_torus_angle_center": np.pi, 76 | "goal_torus_angle_range": args.goal_angle_range * np.pi, 77 | "goal_torus_minor_radius": args.goal_minor_radius, 78 | "dtype": 'float32', 79 | } 80 | 81 | if args.fixed_goal: 82 | env_config['goal'] = [0.25, 0.5] 83 | if args.fixed_init: 84 | env_config['q_init'] = [0., 0., 0.] 85 | if args.fixed_obs and not args.obs_free: 86 | env_config['obstacle_configs'] = [[{'center': [0.0, 0.5], 'radius': 0.1}]] 87 | if args.obs_free: 88 | env_config['obstacle_configs'] = [[]] 89 | 90 | # rmp model configuration 91 | rmp_config = { 92 | 'model': 'flat_rmp', 93 | 'x_shape': 2, 94 | 'units': [args.nn_size, args.nn_size // 2], 95 | 'activation': args.activation, 96 | 'max_metric_value': 5., 97 | 'max_accel_value': 1., 98 | 'output_layer_init_scale': args.layer_init_scale, 99 | } 100 | 101 | # compute feature shape required by the RMP network 102 | max_obstacle_num = env_config['max_obstacle_num'] 103 | rmp_config['feature_shape'] = 2 + 3 * max_obstacle_num 104 | rmp_config['feature_keys'] = ['goal', 'obstacles'] 105 | 106 | # experiment configuration 107 | config = { 108 | "env": args.env, 109 | "env_config": env_config, 110 | "model": { 111 | "custom_model": "diag_gaussian_model", 112 | "custom_model_config": { 113 | "policy_config": rmp_config, 114 | "value_config": { 115 | "model": "mlp", 116 | "hidden_units": (256, 128) 117 | }, 118 | "env_wrapper": args.env, 119 | "init_lstd": -1., 120 | "min_std": 1e-4, 121 | "dtype": "float32", 122 | }, 123 | }, 124 | "batch_mode": "complete_episodes", 125 | "rollout_fragment_length": 600, 126 | "train_batch_size": args.batch_size, 127 | "sgd_minibatch_size": args.sgd_minibatch_size, 128 | "lr": args.lr, 129 | "lambda": args.lambd, 130 | "clip_param": args.clip_param, 131 | "vf_clip_param": 500.0, 132 | "grad_clip": 1e6, 133 | "clip_actions": False, 134 | "num_workers": args.n_workers, 135 | "num_envs_per_worker": 1, 136 | "num_cpus_per_worker": 1, 137 | "num_cpus_for_driver": 1, 138 | "framework": "tf2", 139 | "seed": grid_search([(i + 1) * 100 for i in range(args.n_seeds)]) 140 | } 141 | 142 | stop = { 143 | "training_iteration": args.stop_iters, 144 | } 145 | 146 | experiments = { 147 | experiment_name: { 148 | "run": args.run, 149 | "checkpoint_freq": args.checkpoint_freq, 150 | "checkpoint_at_end": True, 151 | "stop": stop, 152 | "config": config, 153 | } 154 | } 155 | 156 | # run experiments 157 | run_experiments(experiments, reuse_actors=True, concurrent=True) 158 | 159 | # shut down ray 160 | ray.shutdown() 161 | -------------------------------------------------------------------------------- /examples/rl/run_franka_nn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for training nn policy or residual nn policy for the franka robot 3 | """ 4 | 5 | import argparse 6 | 7 | import ray 8 | from ray.tune import grid_search 9 | from ray.tune.tune import run_experiments 10 | from rmp2.utils.rllib_utils import register_envs_and_models 11 | 12 | 13 | parser = argparse.ArgumentParser() 14 | # training config 15 | parser.add_argument("--run", type=str, default="PPO") 16 | parser.add_argument("--experiment-name", type=str, default=None) 17 | parser.add_argument("--stop-iters", type=int, default=500) 18 | parser.add_argument("--checkpoint-freq", type=int, default=1) 19 | parser.add_argument("--n-seeds", type=int, default=1) 20 | # env config 21 | parser.add_argument("--env", type=str, default='franka') # or franka_residual for residual learning 22 | parser.add_argument("--obs-num", type=int, default=3) 23 | # hyperparameters 24 | parser.add_argument("--nn-size", type=int, default=512) 25 | parser.add_argument("--lr", type=float, default=5e-5) 26 | parser.add_argument("--clip-param", type=float, default=0.2) 27 | parser.add_argument("--lambd", type=float, default=0.99) 28 | parser.add_argument("--batch-size", type=int, default=67312) # 336560 29 | parser.add_argument("--sgd-minibatch-size", type=int, default=4096) # 336560 30 | # parallelism 31 | parser.add_argument("--n-workers", type=int, default=1) 32 | 33 | 34 | if __name__ == "__main__": 35 | args = parser.parse_args() 36 | if args.experiment_name is None: 37 | experiment_name = args.env + '-nn-' + str(args.nn_size) + '-lr-' + str(args.lr) + \ 38 | '-cp-' + str(args.clip_param) + '-l-' + str(args.lambd) + '-bs-' + str(args.batch_size) +\ 39 | '-obs-num-' + str(args.obs_num) 40 | else: 41 | experiment_name = args.experiment_name 42 | 43 | # initialize ray 44 | ray.init() 45 | # register customized environments and models within ray 46 | register_envs_and_models() 47 | 48 | # environment configuration 49 | env_config = { 50 | "horizon": 1800, 51 | "max_obstacle_num": args.obs_num, 52 | "min_obstacle_num": args.obs_num, 53 | 'q_init': [ 0.0000, -0.7854, 0.0000, -2.4435, 0.0000, 1.6581, 0.75], 54 | } 55 | 56 | # experiment configuration 57 | config = { 58 | "env": args.env, 59 | "env_config": env_config, 60 | "model": { 61 | "custom_model": "diag_gaussian_model", 62 | "custom_model_config": { 63 | "policy_config": { 64 | "model": "mlp", 65 | "hidden_units": (args.nn_size, args.nn_size // 2), 66 | "activation": "relu", 67 | "hidden_layer_init_scale": 2.0, 68 | "output_layer_init_scale": 0.1, 69 | }, 70 | "value_config": { 71 | "model": "mlp", 72 | "hidden_units": (256, 128) 73 | }, 74 | "env_wrapper": args.env, 75 | "init_lstd": -1., 76 | "min_std": 1e-4, 77 | "dtype": "float32", 78 | }, 79 | }, 80 | "batch_mode": "complete_episodes", 81 | "rollout_fragment_length": 600, 82 | "train_batch_size": args.batch_size, 83 | "sgd_minibatch_size": args.sgd_minibatch_size, 84 | "lr": args.lr, 85 | "lambda": args.lambd, 86 | "clip_param": args.clip_param, 87 | "vf_clip_param": 500.0, 88 | "grad_clip": 1e6, 89 | "clip_actions": False, 90 | "num_workers": args.n_workers, 91 | "num_envs_per_worker": 1, 92 | "num_cpus_per_worker": 1, 93 | "num_cpus_for_driver": 1, 94 | "framework": "tf2", 95 | "seed": grid_search([(i + 1) * 100 for i in range(args.n_seeds)]) 96 | } 97 | 98 | stop = { 99 | "training_iteration": args.stop_iters, 100 | } 101 | 102 | experiments = { 103 | experiment_name: { 104 | "run": args.run, 105 | "checkpoint_freq": args.checkpoint_freq, 106 | "checkpoint_at_end": True, 107 | "stop": stop, 108 | "config": config, 109 | } 110 | } 111 | 112 | # run experiments 113 | run_experiments(experiments, reuse_actors=True, concurrent=True) 114 | 115 | # shut down ray 116 | ray.shutdown() -------------------------------------------------------------------------------- /examples/rl/run_franka_residual_rmp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for training residual rmp policy for the franka robot 3 | """ 4 | 5 | import argparse 6 | 7 | import ray 8 | from ray.tune import grid_search 9 | from ray.tune.tune import run_experiments 10 | from rmp2.utils.rllib_utils import register_envs_and_models 11 | 12 | 13 | parser = argparse.ArgumentParser() 14 | # training config 15 | parser.add_argument("--run", type=str, default="PPO") 16 | parser.add_argument("--experiment-name", type=str, default=None) 17 | parser.add_argument("--stop-iters", type=int, default=500) 18 | parser.add_argument("--checkpoint-freq", type=int, default=1) 19 | parser.add_argument("--n-seeds", type=int, default=1) 20 | # env config 21 | parser.add_argument("--env", type=str, default='franka_rmp') 22 | parser.add_argument("--obs-num", type=int, default=3) 23 | # hyperparameters 24 | parser.add_argument("--nn-size", type=int, default=256) 25 | parser.add_argument("--activation", type=str, default='elu') 26 | parser.add_argument("--lr", type=float, default=5e-5) 27 | parser.add_argument("--clip-param", type=float, default=0.2) 28 | parser.add_argument("--lambd", type=float, default=0.99) 29 | parser.add_argument("--batch-size", type=int, default=67312) 30 | parser.add_argument("--sgd-minibatch-size", type=int, default=4096) 31 | parser.add_argument("--layer-init-scale", type=float, default=0.01) 32 | # parallelism 33 | parser.add_argument("--n-workers", type=int, default=1) 34 | 35 | 36 | if __name__ == "__main__": 37 | args = parser.parse_args() 38 | if args.experiment_name is None: 39 | experiment_name = args.env + '-flatrmp-' + str(args.nn_size) + '-lr-' + str(args.lr) + \ 40 | '-cp-' + str(args.clip_param) + '-l-' + str(args.lambd) + '-bs-' + str(args.batch_size) +\ 41 | '-obs-num' + str(args.obs_num) + '-is-' + str(args.layer_init_scale) 42 | else: 43 | experiment_name = args.experiment_name 44 | 45 | # initialize ray 46 | ray.init() 47 | # register customized environments and models within ray 48 | register_envs_and_models() 49 | 50 | # environment configuration 51 | env_config = { 52 | "horizon": 1800, 53 | "max_obstacle_num": args.obs_num, 54 | "min_obstacle_num": args.obs_num, 55 | 'q_init': [ 0.0000, -0.7854, 0.0000, -2.4435, 0.0000, 1.6581, 0.75], 56 | "dtype": 'float32', 57 | } 58 | 59 | # rmp model configuration 60 | rmp_config = { 61 | 'model': 'flat_rmp', 62 | 'x_shape': 3, 63 | 'units': [args.nn_size, args.nn_size // 2], 64 | 'activation': args.activation, 65 | 'max_metric_value': 5., 66 | 'max_accel_value': 1., 67 | 'output_layer_init_scale': args.layer_init_scale, 68 | } 69 | 70 | 71 | # compute feature shape required by the RMP network 72 | max_obstacle_num = env_config['max_obstacle_num'] 73 | rmp_config['feature_shape'] = 3 + 4 * max_obstacle_num 74 | rmp_config['feature_keys'] = ['goal', 'obstacles'] 75 | 76 | # experiment configuration 77 | config = { 78 | "env": args.env, 79 | "env_config": env_config, 80 | "model": { 81 | "custom_model": "diag_gaussian_model", 82 | "custom_model_config": { 83 | "policy_config": rmp_config, 84 | "value_config": { 85 | "model": "mlp", 86 | "hidden_units": (256, 128) 87 | }, 88 | "env_wrapper": args.env, 89 | "init_lstd": -1., 90 | "min_std": 1e-4, 91 | "dtype": "float32", 92 | }, 93 | }, 94 | "batch_mode": "complete_episodes", 95 | "rollout_fragment_length": 600, 96 | "train_batch_size": args.batch_size, 97 | "sgd_minibatch_size": args.sgd_minibatch_size, 98 | "lr": args.lr, 99 | "lambda": args.lambd, 100 | "clip_param": args.clip_param, 101 | "vf_clip_param": 500.0, 102 | "grad_clip": 1e6, 103 | "clip_actions": False, 104 | "num_workers": args.n_workers, 105 | "num_envs_per_worker": 1, 106 | "num_cpus_per_worker": 1, 107 | "num_cpus_for_driver": 1, 108 | "framework": "tf2", 109 | "seed": grid_search([(i + 1) * 100 for i in range(args.n_seeds)]) 110 | } 111 | 112 | stop = { 113 | "training_iteration": args.stop_iters, 114 | } 115 | 116 | experiments = { 117 | experiment_name: { 118 | "run": args.run, 119 | "checkpoint_freq": args.checkpoint_freq, 120 | "checkpoint_at_end": True, 121 | "stop": stop, 122 | "config": config, 123 | } 124 | } 125 | 126 | # run experiments 127 | run_experiments(experiments, reuse_actors=True, concurrent=True) 128 | 129 | # shut down ray 130 | ray.shutdown() 131 | -------------------------------------------------------------------------------- /examples/rl/run_policy_rollouts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for visualizing the learned policies 3 | """ 4 | 5 | import os 6 | import json 7 | import argparse 8 | 9 | import ray 10 | from ray.tune.registry import get_trainable_cls 11 | from rmp2.utils.rllib_utils import register_envs_and_models 12 | from ray.tune.registry import ENV_CREATOR, _global_registry 13 | import numpy as np 14 | from tqdm import tqdm 15 | import natsort 16 | 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("--ckpt-path", type=str) 20 | parser.add_argument("--ckpt-number", type=int, default=-1) 21 | parser.add_argument("--horizon", type=int, default=1000) 22 | parser.add_argument("--n-trials", type=int, default=1) 23 | 24 | 25 | if __name__ == "__main__": 26 | args = parser.parse_args() 27 | 28 | # if checkpoint number is not given, use the latest checkpoint 29 | if args.ckpt_number < 0: 30 | print('using the latest checkpoint') 31 | ckpt_dirs = [name for name in os.listdir(args.ckpt_path) if os.path.isdir(os.path.join(args.ckpt_path, name))] 32 | ckpt_dirs = natsort.natsorted(ckpt_dirs,reverse=True) 33 | ckpt_number = int(ckpt_dirs[0].split('_')[1]) 34 | ckpt_dir =os.path.join( 35 | args.ckpt_path, 36 | ckpt_dirs[0]) 37 | # otherwise load the specified checkpoint unless it is not found 38 | else: 39 | ckpt_dir = os.path.join( 40 | args.ckpt_path, 41 | 'checkpoint_{}'.format(args.ckpt_number)) 42 | if not os.path.isdir(ckpt_dir): 43 | Warning('checkpoint number not found, use the latest one') 44 | ckpt_dirs = [name for name in os.listdir(args.ckpt_path) if os.path.isdir(os.path.join(args.ckpt_path, name))] 45 | ckpt_dirs = natsort.natsorted(ckpt_dirs,reverse=True) 46 | ckpt_number = int(ckpt_dirs[0].split('_')[1]) 47 | ckpt_dir =os.path.join( 48 | args.ckpt_path, 49 | ckpt_dirs[0]) 50 | print('using the latest checkpoint') 51 | else: 52 | ckpt_number = args.ckpt_number 53 | print('using checkpoint', ckpt_number) 54 | 55 | agent_ckpt_path = os.path.join( 56 | ckpt_dir, 57 | 'checkpoint-{}'.format(ckpt_number)) 58 | 59 | # load experiment config 60 | params_file = os.path.join(args.ckpt_path, "params.json") 61 | with open(params_file) as f: 62 | config = json.load(f) 63 | # disable parallelism 64 | config['num_workers'] = 0 65 | 66 | # initialize ray 67 | ray.init() 68 | 69 | # register customized environments and models within ray 70 | register_envs_and_models() 71 | 72 | # get environment creator and enable rendering 73 | env_creator = _global_registry.get(ENV_CREATOR, config['env']) 74 | env_config = config['env_config'].copy() 75 | env_config['render'] = True 76 | 77 | # restored the learned policy 78 | cls = get_trainable_cls("PPO") 79 | agent = cls(config, config['env']) 80 | agent.restore(agent_ckpt_path) 81 | 82 | policy = agent.workers.local_worker().get_policy() 83 | agent.workers.local_worker().env.__del__() 84 | model = policy.model 85 | 86 | # create a environment 87 | env = env_creator(env_config) 88 | env.seed(100) 89 | env.reset() 90 | env.step(np.zeros(env.action_space.shape[0],)) 91 | 92 | input('waiting for user input...') 93 | 94 | # roll out the policy for n trials 95 | for _ in range(args.n_trials): 96 | state = env.reset() 97 | rewards = [] 98 | for i in tqdm(range(args.horizon)): 99 | action = policy.compute_actions([state]) 100 | action = action[0][0] 101 | state, reward, done, _ = env.step(action) 102 | rewards.append(reward) 103 | if done: 104 | break 105 | 106 | print('total reward from the current episode: ', np.sum(rewards)) 107 | 108 | episode_len = i + 1 -------------------------------------------------------------------------------- /examples/rmp2/rmp2_3link.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for rolling out a hand-designed rmp2 policy on the 3-link robot 3 | rmp parameters are given in rmp2/configs/3link_config.yaml 4 | """ 5 | 6 | from rmp2.rmpgraph import RobotRMPGraph 7 | from rmp2.envs import ThreeLinkEnv 8 | from rmp2.utils.env_wrappers import ThreeLinkFullRMPWrapper 9 | import tensorflow as tf 10 | 11 | n_trials = 10 12 | seed = 15 13 | dtype = "float32" 14 | 15 | env_wrapper = ThreeLinkFullRMPWrapper(dtype=dtype) 16 | rmp_graph = RobotRMPGraph(robot_name="3link", workspace_dim=2, dtype=dtype) 17 | 18 | config = { 19 | "goal": [-0.5, 0.1], 20 | "horizon": 1800, 21 | "action_repeat": 3, 22 | "render": True, 23 | } 24 | 25 | goal = tf.convert_to_tensor([config['goal']]) 26 | 27 | def policy(state): 28 | ts_state = tf.convert_to_tensor([state]) 29 | policy_input = env_wrapper.obs_to_policy_input(ts_state) 30 | policy_input['goal'] = goal 31 | ts_action = rmp_graph(**policy_input) 32 | action = ts_action[0].numpy() 33 | return action 34 | 35 | env = ThreeLinkEnv(config) 36 | env.seed(seed) 37 | state = env.reset() 38 | action = policy(state) 39 | 40 | for _ in range(n_trials): 41 | state = env.reset() 42 | while True: 43 | action = policy(state) 44 | state, reward, done, _ = env.step(action) 45 | if done: 46 | break 47 | 48 | -------------------------------------------------------------------------------- /examples/rmp2/rmp2_franka.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for rolling out a hand-designed rmp2 policy on the franka robot 3 | rmp parameters are given in rmp2/configs/franka_config.yaml 4 | """ 5 | 6 | from rmp2.rmpgraph import RobotRMPGraph 7 | from rmp2.envs import FrankaEnv 8 | from rmp2.utils.env_wrappers import FrankaFullRMPWrapper 9 | import tensorflow as tf 10 | 11 | n_trials = 10 12 | seed = 15 13 | dtype = "float32" 14 | 15 | env_wrapper = FrankaFullRMPWrapper(dtype=dtype) 16 | rmp_graph = RobotRMPGraph(robot_name="franka", dtype=dtype) 17 | 18 | config = { 19 | "goal": [0.5, -0.5, 0.5], 20 | "horizon": 1800, 21 | "action_repeat": 3, 22 | "q_init": [ 0.0000, -0.7854, 0.0000, -2.4435, 0.0000, 1.6581, 0.75], 23 | "render": True, 24 | } 25 | 26 | goal = tf.convert_to_tensor([config['goal']]) 27 | 28 | def policy(state): 29 | ts_state = tf.convert_to_tensor([state]) 30 | policy_input = env_wrapper.obs_to_policy_input(ts_state) 31 | policy_input['goal'] = goal 32 | ts_action = rmp_graph(**policy_input) 33 | action = ts_action[0].numpy() 34 | return action 35 | 36 | env = FrankaEnv(config) 37 | env.seed(seed) 38 | state = env.reset() 39 | action = policy(state) 40 | 41 | for _ in range(n_trials): 42 | state = env.reset() 43 | while True: 44 | action = policy(state) 45 | state, reward, done, _ = env.step(action) 46 | if done: 47 | break 48 | 49 | -------------------------------------------------------------------------------- /rmp2/configs/3link_config.yaml: -------------------------------------------------------------------------------- 1 | # rmp configuration of the hand-designed rmp2 policy for 3-link robot 2 | 3 | robot_name: '3link' 4 | 5 | eef_link: 'link4' 6 | 7 | default_q: [0., 0., 0.] 8 | 9 | joint_limit_buffers: [0., 0., 0.] 10 | 11 | rmp_params: 12 | joint_velocity_cap_rmp: 13 | max_velocity: 1 14 | velocity_damping_region: 0.15 15 | damping_gain: 5.0 16 | metric_weight: 0.05 17 | target_rmp: 18 | accel_p_gain: 50. 19 | accel_d_gain: 70. 20 | accel_norm_eps: .075 21 | metric_alpha_length_scale: .05 22 | min_metric_alpha: .03 23 | max_metric_scalar: 1. 24 | min_metric_scalar: 0.5 25 | proximity_metric_boost_scalar: 20. 26 | proximity_metric_boost_length_scale: .02 27 | collision_rmp: 28 | margin: 0. 29 | damping_gain: 50. 30 | damping_std_dev: .04 31 | damping_robustness_eps: .01 32 | damping_velocity_gate_length_scale: .01 33 | repulsion_gain: 800. 34 | repulsion_std_dev: .01 35 | metric_modulation_radius: 0.5 36 | metric_scalar: 1. 37 | metric_exploder_std_dev: .02 38 | metric_exploder_eps: .001 39 | damping_rmp: 40 | accel_d_gain: 30. 41 | metric_scalar: 0.005 42 | inertia: 0.3 43 | 44 | body_obstacles: [] 45 | 46 | arm_collision_controllers: 47 | - segment: [link1, link2] 48 | interpolation_pts: 5 49 | radius: .025 50 | - segment: [link2, link3] 51 | interpolation_pts: 5 52 | radius: .025 53 | - segment: [link3, link4] 54 | interpolation_pts: 5 55 | radius: .025 56 | -------------------------------------------------------------------------------- /rmp2/configs/3link_residual_config.yaml: -------------------------------------------------------------------------------- 1 | # rmp configuration of the rmp2 policy for 3-link robot 2 | # in residual nn policy learning 3 | 4 | robot_name: '3link' 5 | 6 | eef_link: 'link4' 7 | 8 | default_q: [0., 0., 0.] 9 | 10 | joint_limit_buffers: [0., 0., 0.] 11 | 12 | rmp_params: 13 | joint_velocity_cap_rmp: 14 | max_velocity: 1 15 | velocity_damping_region: 0.1 16 | damping_gain: 5.0 17 | metric_weight: 0.05 18 | target_rmp: 19 | accel_p_gain: 5. 20 | accel_d_gain: 10. 21 | accel_norm_eps: .075 22 | metric_alpha_length_scale: .05 23 | min_metric_alpha: .03 24 | max_metric_scalar: 1. 25 | min_metric_scalar: 0.5 26 | proximity_metric_boost_scalar: 5. 27 | proximity_metric_boost_length_scale: .02 28 | collision_rmp: 29 | margin: 0. 30 | damping_gain: 50. 31 | damping_std_dev: .04 32 | damping_robustness_eps: .01 33 | damping_velocity_gate_length_scale: .01 34 | repulsion_gain: 800. 35 | repulsion_std_dev: .01 36 | metric_modulation_radius: 0.5 37 | metric_scalar: 1. 38 | metric_exploder_std_dev: .02 39 | metric_exploder_eps: .001 40 | damping_rmp: 41 | accel_d_gain: 5. 42 | metric_scalar: 0.005 43 | inertia: 0.3 44 | 45 | body_obstacles: [] 46 | 47 | arm_collision_controllers: 48 | - segment: [link1, link2] 49 | interpolation_pts: 5 50 | radius: .025 51 | - segment: [link2, link3] 52 | interpolation_pts: 5 53 | radius: .025 54 | - segment: [link3, link4] 55 | interpolation_pts: 5 56 | radius: .025 57 | -------------------------------------------------------------------------------- /rmp2/configs/3link_residual_rmp_config.yaml: -------------------------------------------------------------------------------- 1 | # rmp configuration of the rmp2 policy for 3-link robot 2 | # in residual rmp policy learning 3 | 4 | robot_name: '3link' 5 | 6 | eef_link: 'link4' 7 | 8 | default_q: [0., 0., 0.] 9 | 10 | joint_limit_buffers: [0., 0., 0.] 11 | 12 | rmp_params: 13 | joint_velocity_cap_rmp: 14 | max_velocity: 1 15 | velocity_damping_region: 0.1 16 | damping_gain: 5.0 17 | metric_weight: 0.05 18 | collision_rmp: 19 | margin: 0. 20 | damping_gain: 50. 21 | damping_std_dev: .04 22 | damping_robustness_eps: .01 23 | damping_velocity_gate_length_scale: .01 24 | repulsion_gain: 800. 25 | repulsion_std_dev: .01 26 | metric_modulation_radius: 0.5 27 | metric_scalar: 1. 28 | metric_exploder_std_dev: .02 29 | metric_exploder_eps: .001 30 | damping_rmp: 31 | accel_d_gain: 5. 32 | metric_scalar: 0.005 33 | inertia: 0.3 34 | external_rmps: 35 | - name: 'nn_attractor' 36 | link: 'link4' 37 | handcrafted_rmp: 'target_rmp' 38 | handcrafted_rmp_config: 39 | accel_p_gain: 5. 40 | accel_d_gain: 10. 41 | accel_norm_eps: .075 42 | metric_alpha_length_scale: .05 43 | min_metric_alpha: .03 44 | max_metric_scalar: 1. 45 | min_metric_scalar: 0.5 46 | proximity_metric_boost_scalar: 5. 47 | proximity_metric_boost_length_scale: .02 48 | 49 | body_obstacles: [] 50 | 51 | arm_collision_controllers: 52 | - segment: [link1, link2] 53 | interpolation_pts: 5 54 | radius: .025 55 | - segment: [link2, link3] 56 | interpolation_pts: 5 57 | radius: .025 58 | - segment: [link3, link4] 59 | interpolation_pts: 5 60 | radius: .025 61 | -------------------------------------------------------------------------------- /rmp2/configs/franka_config.yaml: -------------------------------------------------------------------------------- 1 | # rmp configuration of the hand-designed rmp2 policy for franka robot 2 | 3 | robot_name: 'franka' 4 | 5 | eef_link: 'panda_grasptarget' 6 | 7 | default_q: [ 8 | 0.0000, -0.7854, 0.0000, -2.4435, 0.0000, 1.6581, 0.75 9 | ] 10 | 11 | 12 | joint_limit_buffers: [.01, .03, .01, .01, .01, .01, .01] 13 | 14 | rmp_params: 15 | cspace_target_rmp: 16 | metric_scalar: 0.005 17 | position_gain: 100. 18 | damping_gain: 50. 19 | robust_position_term_thresh: .5 20 | inertia: 0.0001 21 | joint_limit_rmp: 22 | metric_scalar: 0.1 23 | metric_length_scale: .01 24 | metric_exploder_eps: .001 25 | metric_velocity_gate_length_scale: .01 26 | accel_damper_gain: 200. 27 | accel_potential_gain: 1. 28 | accel_potential_exploder_length_scale: .1 29 | accel_potential_exploder_eps: .01 30 | joint_velocity_cap_rmp: 31 | max_velocity: 1.7 32 | velocity_damping_region: 0.15 33 | damping_gain: 5.0 34 | metric_weight: 0.05 35 | target_rmp: 36 | accel_p_gain: 50. 37 | accel_d_gain: 70. 38 | accel_norm_eps: .075 39 | metric_alpha_length_scale: .05 40 | min_metric_alpha: .03 41 | max_metric_scalar: 1.0 42 | min_metric_scalar: 0.5 43 | proximity_metric_boost_scalar: 3. 44 | proximity_metric_boost_length_scale: .02 45 | collision_rmp: 46 | margin: 0. 47 | damping_gain: 50. 48 | damping_std_dev: .04 49 | damping_robustness_eps: .01 50 | damping_velocity_gate_length_scale: .01 51 | repulsion_gain: 800. 52 | repulsion_std_dev: .01 53 | metric_modulation_radius: .5 54 | metric_scalar: 1. 55 | metric_exploder_std_dev: .02 56 | metric_exploder_eps: .001 57 | damping_rmp: 58 | accel_d_gain: 30. 59 | metric_scalar: 0.005 60 | inertia: 0.3 61 | 62 | body_obstacles: [] 63 | 64 | arm_collision_controllers: 65 | - segment: [panda_link1, panda_link3] 66 | interpolation_pts: 10 67 | radius: .1 68 | - segment: [panda_forearm_end_pt, panda_forearm_mid_pt] 69 | interpolation_pts: 5 70 | radius: .1 71 | - segment: [panda_forearm_mid_pt_shifted, panda_forearm_distal] 72 | interpolation_pts: 5 73 | radius: .075 74 | - segment: [panda_wrist_end_pt, panda_link8] 75 | interpolation_pts: 8 76 | radius: .05 77 | - segment: [panda_face_back_left, panda_face_back_right] 78 | interpolation_pts: 5 79 | radius: .03 80 | suppression_name: right_gripper 81 | - segment: [panda_face_left, panda_face_right] 82 | interpolation_pts: 5 83 | radius: .02 84 | suppression_name: right_gripper 85 | - segment: [panda_hand, panda_leftfingertip] 86 | interpolation_pts: 5 87 | radius: .01 88 | suppression_name: right_gripper 89 | - segment: [panda_hand, panda_rightfingertip] 90 | interpolation_pts: 5 91 | radius: .01 92 | suppression_name: right_gripper 93 | -------------------------------------------------------------------------------- /rmp2/configs/franka_residual_config.yaml: -------------------------------------------------------------------------------- 1 | # rmp configuration of the rmp2 policy for franka robot 2 | # in residual nn policy learning 3 | 4 | robot_name: 'franka' 5 | 6 | eef_link: 'panda_grasptarget' 7 | 8 | default_q: [ 9 | 0.0000, -0.7854, 0.0000, -2.4435, 0.0000, 1.6581, 0.75 10 | ] 11 | 12 | 13 | joint_limit_buffers: [.01, .03, .01, .01, .01, .01, .01] 14 | 15 | rmp_params: 16 | cspace_target_rmp: 17 | metric_scalar: 0.005 18 | position_gain: 1. 19 | damping_gain: 2. 20 | robust_position_term_thresh: .5 21 | inertia: 0.0001 22 | joint_limit_rmp: 23 | metric_scalar: 0.1 24 | metric_length_scale: .01 25 | metric_exploder_eps: .001 26 | metric_velocity_gate_length_scale: .01 27 | accel_damper_gain: 200. 28 | accel_potential_gain: 1. 29 | accel_potential_exploder_length_scale: .1 30 | accel_potential_exploder_eps: .01 31 | joint_velocity_cap_rmp: 32 | max_velocity: 1.7 33 | velocity_damping_region: 0.15 34 | damping_gain: 5.0 35 | metric_weight: 0.05 36 | target_rmp: 37 | accel_p_gain: 0.5 38 | accel_d_gain: 0.5 39 | accel_norm_eps: .075 40 | metric_alpha_length_scale: .05 41 | min_metric_alpha: .03 42 | max_metric_scalar: 0.5 43 | min_metric_scalar: 0.5 44 | proximity_metric_boost_scalar: 1. 45 | proximity_metric_boost_length_scale: .02 46 | collision_rmp: 47 | margin: 0. 48 | damping_gain: 50. 49 | damping_std_dev: .04 50 | damping_robustness_eps: .01 51 | damping_velocity_gate_length_scale: .01 52 | repulsion_gain: 800. 53 | repulsion_std_dev: .01 54 | metric_modulation_radius: .5 55 | metric_scalar: 1. 56 | metric_exploder_std_dev: .02 57 | metric_exploder_eps: .001 58 | damping_rmp: 59 | accel_d_gain: 1. 60 | metric_scalar: 0.005 61 | inertia: 0.3 62 | 63 | body_obstacles: [] 64 | 65 | arm_collision_controllers: 66 | - segment: [panda_link1, panda_link3] 67 | interpolation_pts: 10 68 | radius: .1 69 | - segment: [panda_forearm_end_pt, panda_forearm_mid_pt] 70 | interpolation_pts: 5 71 | radius: .1 72 | - segment: [panda_forearm_mid_pt_shifted, panda_forearm_distal] 73 | interpolation_pts: 5 74 | radius: .075 75 | - segment: [panda_wrist_end_pt, panda_link8] 76 | interpolation_pts: 8 77 | radius: .05 78 | - segment: [panda_face_back_left, panda_face_back_right] 79 | interpolation_pts: 5 80 | radius: .03 81 | suppression_name: right_gripper 82 | - segment: [panda_face_left, panda_face_right] 83 | interpolation_pts: 5 84 | radius: .02 85 | suppression_name: right_gripper 86 | - segment: [panda_hand, panda_leftfingertip] 87 | interpolation_pts: 5 88 | radius: .01 89 | suppression_name: right_gripper 90 | - segment: [panda_hand, panda_rightfingertip] 91 | interpolation_pts: 5 92 | radius: .01 93 | suppression_name: right_gripper 94 | -------------------------------------------------------------------------------- /rmp2/configs/franka_residual_rmp_config.yaml: -------------------------------------------------------------------------------- 1 | # rmp configuration of the rmp2 policy for franka robot 2 | # in residual rmp policy learning 3 | 4 | robot_name: 'franka' 5 | 6 | eef_link: 'panda_grasptarget' 7 | 8 | default_q: [ 9 | 0.0000, -0.7854, 0.0000, -2.4435, 0.0000, 1.6581, 0.75 10 | ] 11 | 12 | 13 | joint_limit_buffers: [.01, .03, .01, .01, .01, .01, .01] 14 | 15 | rmp_params: 16 | cspace_target_rmp: 17 | metric_scalar: 0.005 18 | position_gain: 1. 19 | damping_gain: 2. 20 | robust_position_term_thresh: .5 21 | inertia: 0.0001 22 | joint_limit_rmp: 23 | metric_scalar: 0.1 24 | metric_length_scale: .01 25 | metric_exploder_eps: .001 26 | metric_velocity_gate_length_scale: .01 27 | accel_damper_gain: 200. 28 | accel_potential_gain: 1. 29 | accel_potential_exploder_length_scale: .1 30 | accel_potential_exploder_eps: .01 31 | joint_velocity_cap_rmp: 32 | max_velocity: 1.7 33 | velocity_damping_region: 0.15 34 | damping_gain: 5.0 35 | metric_weight: 0.05 36 | collision_rmp: 37 | margin: 0. 38 | damping_gain: 50. 39 | damping_std_dev: .04 40 | damping_robustness_eps: .01 41 | damping_velocity_gate_length_scale: .01 42 | repulsion_gain: 800. 43 | repulsion_std_dev: .01 44 | metric_modulation_radius: .5 45 | metric_scalar: 1. 46 | metric_exploder_std_dev: .02 47 | metric_exploder_eps: .001 48 | damping_rmp: 49 | accel_d_gain: 1. 50 | metric_scalar: 0.005 51 | inertia: 0.3 52 | external_rmps: 53 | - name: 'nn_attractor' 54 | link: 'panda_grasptarget' 55 | handcrafted_rmp: 'target_rmp' 56 | handcrafted_rmp_config: 57 | accel_p_gain: 0.5 58 | accel_d_gain: 0.5 59 | accel_norm_eps: .075 60 | metric_alpha_length_scale: .05 61 | min_metric_alpha: .03 62 | max_metric_scalar: 0.5 63 | min_metric_scalar: 0.5 64 | proximity_metric_boost_scalar: 1. 65 | proximity_metric_boost_length_scale: .02 66 | 67 | body_obstacles: [] 68 | 69 | arm_collision_controllers: 70 | - segment: [panda_link1, panda_link3] 71 | interpolation_pts: 10 72 | radius: .1 73 | - segment: [panda_forearm_end_pt, panda_forearm_mid_pt] 74 | interpolation_pts: 5 75 | radius: .1 76 | - segment: [panda_forearm_mid_pt_shifted, panda_forearm_distal] 77 | interpolation_pts: 5 78 | radius: .075 79 | - segment: [panda_wrist_end_pt, panda_link8] 80 | interpolation_pts: 8 81 | radius: .05 82 | - segment: [panda_face_back_left, panda_face_back_right] 83 | interpolation_pts: 5 84 | radius: .03 85 | suppression_name: right_gripper 86 | - segment: [panda_face_left, panda_face_right] 87 | interpolation_pts: 5 88 | radius: .02 89 | suppression_name: right_gripper 90 | - segment: [panda_hand, panda_leftfingertip] 91 | interpolation_pts: 5 92 | radius: .01 93 | suppression_name: right_gripper 94 | - segment: [panda_hand, panda_rightfingertip] 95 | interpolation_pts: 5 96 | radius: .01 97 | suppression_name: right_gripper 98 | -------------------------------------------------------------------------------- /rmp2/envs/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from rmp2.envs.three_link_env import ThreeLinkEnv 3 | from rmp2.envs.three_link_residual_env import ThreeLinkResidualEnv 4 | from rmp2.envs.three_link_residual_rmp_env import ThreeLinkResidualRMPEnv 5 | from rmp2.envs.franka_env import FrankaEnv 6 | from rmp2.envs.franka_residual_env import FrankaResidualEnv 7 | from rmp2.envs.franka_residual_rmp_env import FrankaResidualRMPEnv -------------------------------------------------------------------------------- /rmp2/envs/franka_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base gym environment for franka robot 3 | """ 4 | 5 | from rmp2.envs.robot_env import RobotEnv 6 | from rmp2.utils.np_utils import sample_from_torus_3d 7 | from rmp2.utils.python_utils import merge_dicts 8 | from rmp2.utils.bullet_utils import add_goal, add_obstacle_ball 9 | import numpy as np 10 | 11 | DEFAULT_CONFIG = { 12 | # parameters for randomly generated goals 13 | "goal_torus_angle_center": 0., 14 | "goal_torus_angle_range": np.pi, 15 | "goal_torus_major_radius": 0.5, 16 | "goal_torus_minor_radius": 0.3, 17 | "goal_torus_height": 0.5, 18 | # parameters for randomly generated obstacles 19 | "obs_torus_angle_center": 0., 20 | "obs_torus_angle_range": np.pi, 21 | "obs_torus_major_radius": 0.5, 22 | "obs_torus_minor_radius": 0.3, 23 | "obs_torus_height": 0.5, 24 | # obstacle size 25 | "max_obstacle_radius": 0.1, 26 | "min_obstacle_radius": 0.05, 27 | # init min goal distance 28 | "initial_goal_distance_min": 0.5, 29 | 30 | } 31 | 32 | class FrankaEnv(RobotEnv): 33 | """ 34 | Base gym environment for franka robot 35 | """ 36 | def __init__(self, config=None): 37 | if config is not None: 38 | config = merge_dicts(DEFAULT_CONFIG, config) 39 | else: 40 | config = DEFAULT_CONFIG.copy() 41 | 42 | # random goal config 43 | self._goal_torus_angle_center = config["goal_torus_angle_center"] 44 | self._goal_torus_angle_range = config["goal_torus_angle_range"] 45 | self._goal_torus_major_radius = config["goal_torus_major_radius"] 46 | self._goal_torus_minor_radius = config["goal_torus_minor_radius"] 47 | self._goal_torus_height = config["goal_torus_height"] 48 | # random obstacle config 49 | self._obs_torus_angle_center = config["obs_torus_angle_center"] 50 | self._obs_torus_angle_range = config["obs_torus_angle_range"] 51 | self._obs_torus_major_radius = config["obs_torus_major_radius"] 52 | self._obs_torus_minor_radius = config["obs_torus_minor_radius"] 53 | self._obs_torus_height = config["obs_torus_height"] 54 | 55 | super().__init__( 56 | robot_name="franka", 57 | workspace_dim=3, 58 | config=config) 59 | 60 | def _generate_random_goal(self): 61 | # if goal is given, use the fixed goal 62 | if self.goal is None: 63 | current_goal = sample_from_torus_3d( 64 | self.np_random, 65 | self._goal_torus_angle_center, 66 | self._goal_torus_angle_range, 67 | self._goal_torus_major_radius, 68 | self._goal_torus_minor_radius, 69 | self._goal_torus_height) 70 | # otherwise, sample a random goal with the specified parameters 71 | else: 72 | current_goal = self.goal 73 | # generate goal object within pybullet 74 | goal_uid = add_goal(self._p, current_goal) 75 | return current_goal, goal_uid 76 | 77 | def _generate_random_obstacles(self): 78 | current_obs = [] 79 | obs_uids = [] 80 | 81 | # if obstacle config list is given, sample one config from the list 82 | if self.obstacle_cofigs is not None: 83 | config = self.obstacle_cofigs[self.np_random.randint(len(self.obstacle_cofigs))] 84 | for (i, obstacle) in enumerate(config): 85 | obs_uids.append( 86 | add_obstacle_ball(self._p, obstacle['center'], obstacle['radius']) 87 | ) 88 | current_obs.append(np.append(obstacle['center'], obstacle['radius'])) 89 | for i in range(len(config), self.max_obstacle_num): 90 | current_obs.append(np.append(np.zeros(self.workspace_dim), -1.)) 91 | # otherwise, sample random obstacles with the specified parameters 92 | else: 93 | num_obstacles = self.np_random.randint(self.min_obstacle_num, self.max_obstacle_num + 1) 94 | for i in range(self.max_obstacle_num): 95 | if i < num_obstacles: 96 | radius = self.np_random.uniform(low=self.min_obstacle_radius, high=self.max_obstacle_radius) 97 | center = sample_from_torus_3d( 98 | self.np_random, 99 | self._obs_torus_angle_center, 100 | self._obs_torus_angle_range, 101 | self._obs_torus_major_radius, 102 | self._obs_torus_minor_radius, 103 | self._obs_torus_height) 104 | obs_uids.append( 105 | add_obstacle_ball(self._p, center, radius) 106 | ) 107 | current_obs.append(np.append(center, radius)) 108 | else: 109 | current_obs.append(np.append(np.zeros(self.workspace_dim), -1.)) 110 | # generate obstacle objects within pybullet 111 | current_obs = np.array(current_obs).flatten() 112 | return current_obs, obs_uids -------------------------------------------------------------------------------- /rmp2/envs/franka_residual_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gym environment for training residual policies 3 | on top of rmp2 policies for franka robot 4 | """ 5 | 6 | from rmp2.envs.franka_env import FrankaEnv 7 | from rmp2.rmpgraph.robotics import RobotRMPGraph 8 | from rmp2.utils.python_utils import merge_dicts 9 | import tensorflow as tf 10 | import numpy as np 11 | import os 12 | 13 | DEFAULT_CONFIG = { 14 | "dtype": "float32", 15 | "offset": 1e-3, 16 | } 17 | 18 | class FrankaResidualEnv(FrankaEnv): 19 | """ 20 | Gym environment for training residual policies 21 | on top of rmp2 policies for franka robot 22 | """ 23 | def __init__(self, config=None): 24 | if config is not None: 25 | config = merge_dicts(DEFAULT_CONFIG, config) 26 | else: 27 | config = DEFAULT_CONFIG.copy() 28 | 29 | # load rmp configs for the rmp2 policy 30 | config_path = os.path.join( 31 | os.path.dirname(__file__), 32 | '../configs/franka_residual_config.yaml' 33 | ) 34 | 35 | # create rmp2 policy 36 | self.rmp_graph = RobotRMPGraph( 37 | 'franka', 38 | config_path=config_path, 39 | dtype=config['dtype'], 40 | offset=config['offset']) 41 | 42 | self.dtype = config['dtype'] 43 | self._ts_goal = None 44 | self._ts_obs = None 45 | 46 | super().__init__(config=config) 47 | 48 | def _generate_random_goal(self): 49 | # additionally keep a goal tensor for computing rmp2 policy 50 | current_goal, goal_uid = super()._generate_random_goal() 51 | self._ts_goal = tf.convert_to_tensor(np.array([current_goal]), dtype=self.dtype) 52 | return current_goal, goal_uid 53 | 54 | def _generate_random_obstacles(self): 55 | # additionally keep a obstacle tensor for computing rmp2 policy 56 | current_obs, obs_uids = super()._generate_random_obstacles() 57 | self._ts_obs = tf.convert_to_tensor(np.array([current_obs]), dtype=self.dtype) 58 | self._ts_obs = tf.reshape(self._ts_obs, (1, -1, self.workspace_dim + 1)) 59 | return current_obs, obs_uids 60 | 61 | def step(self, residual_action): 62 | # compute the rmp2 policy 63 | joint_poses, joint_vels, _ = self._robot.get_observation() 64 | ts_joint_poses = tf.convert_to_tensor([joint_poses], dtype=self.dtype) 65 | ts_joint_vels = tf.convert_to_tensor([joint_vels], dtype=self.dtype) 66 | ts_action = self.rmp_graph(ts_joint_poses, ts_joint_vels, obstacles=self._ts_obs, goal=self._ts_goal) 67 | action = ts_action.numpy() 68 | action = action.flatten() 69 | action = np.clip(action, self._action_space.low, self._action_space.high) 70 | # add the residual policy and rmp2 policy together 71 | action = action + residual_action 72 | return super().step(action) 73 | -------------------------------------------------------------------------------- /rmp2/envs/franka_residual_rmp_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gym environment for training rmp policies 3 | on top of rmp2 policies for franka robot 4 | """ 5 | 6 | from rmp2.envs.franka_env import FrankaEnv 7 | from rmp2.rmpgraph.robotics import RobotRMPGraph 8 | from rmp2.utils.python_utils import merge_dicts 9 | import tensorflow as tf 10 | import numpy as np 11 | from gym import spaces 12 | import os 13 | 14 | BULLET_LINK_POSE_INDEX = 4 15 | BULLET_LINK_VEL_INDEX = 6 16 | 17 | DEFAULT_CONFIG = { 18 | "residual": True, 19 | "dtype": "float32", 20 | "offset": 1e-3, 21 | } 22 | 23 | class FrankaResidualRMPEnv(FrankaEnv): 24 | """ 25 | Gym environment for training rmp policies 26 | on top of rmp2 policies for franka robot 27 | """ 28 | def __init__(self, config=None): 29 | if config is not None: 30 | config = merge_dicts(DEFAULT_CONFIG, config) 31 | else: 32 | config = DEFAULT_CONFIG.copy() 33 | 34 | # load rmp configs for the rmp2 policy 35 | config_path = os.path.join( 36 | os.path.dirname(__file__), 37 | '../configs/franka_residual_rmp_config.yaml' 38 | ) 39 | 40 | # create rmp2 policy 41 | self.rmp_graph = RobotRMPGraph( 42 | 'franka', 43 | config_path=config_path, 44 | dtype=config['dtype'], 45 | offset=config['offset']) 46 | 47 | self.dtype = config['dtype'] 48 | self._ts_goal = None 49 | self._ts_obs = None 50 | 51 | # find all the residual rmps to be learned 52 | self.external_rmp_names = [] 53 | self.external_rmp_links = [] 54 | for key, rmp in zip(self.rmp_graph.keys, self.rmp_graph.rmps): 55 | if key == 'external_rmp': 56 | self.external_rmp_names.append(rmp.feature_name) 57 | self.external_rmp_links.append(rmp.link) 58 | 59 | super().__init__(config=config) 60 | 61 | action_space_dim = 0 62 | for link in self.external_rmp_links: 63 | if link == 'joint': 64 | action_space_dim += self.cspace_dim * (self.cspace_dim + 1) 65 | else: 66 | action_space_dim += self.workspace_dim * (self.workspace_dim + 1) 67 | self.action_space = spaces.Box( 68 | low=-np.inf, high=np.inf, 69 | shape=(action_space_dim,), dtype=self.dtype) 70 | 71 | 72 | def _generate_random_goal(self): 73 | # additionally keep a goal tensor for computing rmp2 policy 74 | current_goal, goal_uid = super()._generate_random_goal() 75 | self._ts_goal = tf.convert_to_tensor([current_goal], dtype=self.dtype) 76 | return current_goal, goal_uid 77 | 78 | def _generate_random_obstacles(self): 79 | # additionally keep a obstacle tensor for computing rmp2 policy 80 | current_obs, obs_uids = super()._generate_random_obstacles() 81 | self._ts_obs = tf.convert_to_tensor([current_obs], dtype=self.dtype) 82 | self._ts_obs = tf.reshape(self._ts_obs, (1, -1, self.workspace_dim + 1)) 83 | return current_obs, obs_uids 84 | 85 | def step(self, external_rmps): 86 | # unpack the flat output of the residual rmps 87 | index = 0 88 | features = {} 89 | for name, link in zip(self.external_rmp_names, self.external_rmp_links): 90 | if link == 'joint': 91 | dim = self.cspace_dim 92 | else: 93 | dim = self.workspace_dim 94 | metric_sqrt = external_rmps[index: index + dim ** 2] 95 | metric_sqrt = np.reshape(metric_sqrt, (dim, dim)) 96 | index += dim ** 2 97 | accel = external_rmps[index: index + dim] 98 | index += dim 99 | 100 | ts_metric_sqrt = tf.convert_to_tensor([metric_sqrt], dtype=self.dtype) 101 | ts_accel = tf.convert_to_tensor([accel], dtype=self.dtype) 102 | features[name] = (ts_metric_sqrt, ts_accel) 103 | 104 | # compute the combined rmp2 policy 105 | joint_poses, joint_vels, _ = self._robot.get_observation() 106 | ts_joint_poses = tf.convert_to_tensor([joint_poses], dtype=self.dtype) 107 | ts_joint_vels = tf.convert_to_tensor([joint_vels], dtype=self.dtype) 108 | 109 | ts_action = self.rmp_graph( 110 | ts_joint_poses, ts_joint_vels, 111 | obstacles=self._ts_obs, goal=self._ts_goal, 112 | **features) 113 | action = ts_action.numpy() 114 | action = action.flatten() 115 | 116 | return super().step(action) 117 | 118 | def get_extended_observation(self): 119 | # additionally add the current goal position, 120 | # and link poses & velocities for residual rmps 121 | 122 | link_poses = [] 123 | link_vels = [] 124 | 125 | for link in self.external_rmp_links: 126 | link_state = self._p.getLinkState( 127 | self._robot.robot_uid, self._robot._link_index[link], 128 | computeLinkVelocity=1, 129 | computeForwardKinematics=1) 130 | link_pose = link_state[BULLET_LINK_POSE_INDEX][:self.workspace_dim] 131 | link_vel = link_state[BULLET_LINK_VEL_INDEX][:self.workspace_dim] 132 | link_poses.append(link_pose) 133 | link_vels.append(link_vel) 134 | link_poses = np.array(link_poses).flatten() 135 | link_vels = np.array(link_vels).flatten() 136 | 137 | observation = super().get_extended_observation() 138 | 139 | self._observation = np.concatenate( 140 | (observation, self.current_goal, link_poses, link_vels) 141 | ) 142 | 143 | return self._observation -------------------------------------------------------------------------------- /rmp2/envs/robot_sim.py: -------------------------------------------------------------------------------- 1 | """ 2 | acceleration-based control for a pybullet robot 3 | """ 4 | 5 | from rmp2.utils.robot_config_utils import get_robot_urdf_path, get_robot_eef_uid 6 | import numpy as np 7 | 8 | # pybullet macro 9 | JOINT_POSE_IDX = 0 10 | JOINT_VEL_IDX = 1 11 | JOINT_TORQUE_IDX = 3 12 | 13 | JOINT_NAME_IDX = 1 14 | JOINT_TYPE_IDX = 2 15 | JOINT_LOWER_LIMIT_IDX = 8 16 | JOINT_UPPER_LIMIT_IDX = 9 17 | JOINT_VEL_LIMIT_IDX = 11 18 | LINK_NAME_IDX = 12 19 | 20 | # control modes: 21 | 22 | # CLOSED LOOP (NOT RECOMMENDED): Both the actual 23 | # joint angles and actual joint velocities are 24 | # used to compute the reference next-step joint 25 | # angles and velocities for the low-level pd 26 | # controller. This often leads to unstable behavior 27 | # and is hence not recommended 28 | CLOSED_LOOP = 0 29 | # VELOCITY OPEN LOOP (DEFAULT): The actual joint 30 | # angles and virtual joint velocities (computed 31 | # through numerically integrating the accelerations) 32 | # are used to compute the reference joint angles and 33 | # velocities 34 | VEL_OPEN_LOOP = 1 35 | # OPEN LOOP: The virtual joint angles and joint 36 | # velocities (both computed through numerical 37 | # integration) are used to compute the reference 38 | # joint angles and velocities 39 | OPEN_LOOP = 2 40 | 41 | class RobotSim(object): 42 | """ 43 | acceleration-based control for a pybullet robot 44 | """ 45 | def __init__(self, urdf_path, eef_uid, bullet_client, time_step, mode=VEL_OPEN_LOOP): 46 | self.bullet_client = bullet_client 47 | self.time_step = time_step 48 | self.bullet_client.setTimeStep(time_step) 49 | 50 | assert mode == CLOSED_LOOP or mode == VEL_OPEN_LOOP or mode == OPEN_LOOP 51 | self._mode = mode 52 | 53 | flags = self.bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES 54 | self.robot_uid = self.bullet_client.loadURDF(urdf_path, useFixedBase=True, flags=flags) 55 | 56 | self.joint_poses = None 57 | self.joint_vels = None 58 | self.joint_torques = None 59 | 60 | self.eef_uid = eef_uid 61 | 62 | self._joint_indices = None 63 | self._joint_lower_limit = None 64 | self._joint_upper_limit = None 65 | self._joint_vel_limit = None 66 | 67 | self._set_joint_indices() 68 | self.cspace_dim = len(self._joint_indices) 69 | 70 | 71 | def reset(self, initial_config, initial_vel): 72 | """ 73 | reset the robot to initial configuration and velocity 74 | """ 75 | initial_config = np.array(initial_config) 76 | initial_vel = np.array(initial_vel) 77 | 78 | for i, j in enumerate(self._joint_indices): 79 | self.bullet_client.resetJointState(self.robot_uid, j, 80 | targetValue=initial_config[i], 81 | targetVelocity=initial_vel[i]) 82 | 83 | self.joint_poses = np.array(initial_config) 84 | self.joint_vels = np.array(initial_vel) 85 | self.joint_torques = np.zeros_like(self.joint_poses) 86 | 87 | self.target_joint_poses = self.joint_poses 88 | self.target_joint_vels = self.joint_vels 89 | 90 | def step(self, action): 91 | """ 92 | apply velocity control to the robot 93 | :param action: joint accelerations 94 | """ 95 | if self.joint_poses is None or self.joint_vels is None: 96 | raise Exception('Error: make sure to call reset() before step!') 97 | 98 | # forward predict using Euler integration 99 | self.target_joint_poses = self.joint_poses + self.joint_vels * self.time_step 100 | self.target_joint_vels = self.joint_vels + action * self.time_step 101 | 102 | # clip to respect joint limits and joint velocity limits 103 | self.target_joint_poses = np.clip(self.target_joint_poses, self._joint_lower_limit, self._joint_upper_limit) 104 | self.target_joint_vels[self.target_joint_poses == self._joint_lower_limit] = 0. 105 | self.target_joint_vels[self.target_joint_poses == self._joint_upper_limit] = 0. 106 | self.target_joint_vels = np.clip(self.target_joint_vels, -self._joint_vel_limit, self._joint_vel_limit) 107 | 108 | self.bullet_client.setJointMotorControlArray( 109 | self.robot_uid, self._joint_indices, 110 | self.bullet_client.POSITION_CONTROL, 111 | targetPositions=self.target_joint_poses, targetVelocities=self.target_joint_vels) 112 | 113 | def get_observation(self): 114 | """ 115 | joint angles and velocities of the robot. 116 | for CLOSED_LOOP (NOT RECOMMENDED): both joint 117 | angles and velocities are given by the 118 | pybullet simulator 119 | for VEL_OPEN_LOOP: joint angles are given by the 120 | pybullet simulator, yet the joint velocities 121 | are given by numerical integration 122 | for OPEN_LOOP: both joint angles and velocities 123 | are given by numerical integration 124 | """ 125 | full_joint_states = self.bullet_client.getJointStates( 126 | self.robot_uid, self._joint_indices) 127 | if self._mode == OPEN_LOOP: 128 | self.joint_poses = self.target_joint_poses 129 | self.joint_vels = self.target_joint_vels 130 | elif self._mode == VEL_OPEN_LOOP: 131 | self.joint_poses = np.array([joint_state[JOINT_POSE_IDX] for joint_state in full_joint_states]) 132 | self.joint_vels = self.target_joint_vels 133 | elif self._mode == CLOSED_LOOP: 134 | self.joint_poses = np.array([joint_state[JOINT_POSE_IDX] for joint_state in full_joint_states]) 135 | self.joint_vels = np.array([joint_state[JOINT_VEL_IDX] for joint_state in full_joint_states]) 136 | self.joint_torques = np.array([joint_state[JOINT_TORQUE_IDX] for joint_state in full_joint_states]) 137 | return self.joint_poses.copy(), self.joint_vels.copy(), self.joint_torques.copy() 138 | 139 | def _set_joint_indices(self): 140 | """ 141 | set the joint limits for the robot 142 | """ 143 | self._joint_indices = [] 144 | self._joint_lower_limit = [] 145 | self._joint_upper_limit = [] 146 | self._joint_vel_limit = [] 147 | self._link_index = {} 148 | 149 | for j in range(self.bullet_client.getNumJoints(self.robot_uid)): 150 | info = self.bullet_client.getJointInfo(self.robot_uid, j) 151 | joint_name = info[JOINT_NAME_IDX] 152 | joint_type = info[JOINT_TYPE_IDX] 153 | joint_lower_limit = info[JOINT_LOWER_LIMIT_IDX] 154 | joint_upper_limit = info[JOINT_UPPER_LIMIT_IDX] 155 | joint_vel_limit = info[JOINT_VEL_LIMIT_IDX] 156 | link_name = info[LINK_NAME_IDX] 157 | 158 | self._link_index[link_name.decode("utf-8")] = j 159 | 160 | if (joint_type == self.bullet_client.JOINT_PRISMATIC or joint_type == self.bullet_client.JOINT_REVOLUTE): 161 | self._joint_indices.append(j) 162 | self._joint_lower_limit.append(joint_lower_limit) 163 | self._joint_upper_limit.append(joint_upper_limit) 164 | self._joint_vel_limit.append(joint_vel_limit) 165 | 166 | self._joint_lower_limit = np.array(self._joint_lower_limit) 167 | self._joint_upper_limit = np.array(self._joint_upper_limit) 168 | self._joint_vel_limit = np.array(self._joint_vel_limit) 169 | 170 | indices = self._joint_lower_limit > self._joint_upper_limit 171 | self._joint_lower_limit[indices] = -np.inf 172 | self._joint_upper_limit[indices] = np.inf 173 | indices = self._joint_vel_limit <= 0 174 | self._joint_vel_limit[indices] = np.inf 175 | 176 | 177 | def create_robot_sim(robot_name, bullet_client, time_step, mode=VEL_OPEN_LOOP): 178 | """ 179 | create a acceleration-based control robot given name 180 | :param robot_name: robot name, 3link or franka 181 | :param bullet_client: pybullet client 182 | :param time_step: simulation time between steps 183 | :param mode: control mode (see macros) 184 | :return robot_sim: RobotSim object for 185 | acceleration-based control of the robot 186 | """ 187 | urdf_path = get_robot_urdf_path(robot_name) 188 | eef_uid = get_robot_eef_uid(robot_name) 189 | robot_sim = RobotSim(urdf_path, eef_uid, bullet_client=bullet_client, time_step=time_step, mode=mode) 190 | return robot_sim 191 | 192 | -------------------------------------------------------------------------------- /rmp2/envs/three_link_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base gym environment for 3-link robot 3 | """ 4 | 5 | from rmp2.envs.robot_env import RobotEnv 6 | from rmp2.utils.np_utils import sample_from_torus_2d 7 | from rmp2.utils.python_utils import merge_dicts 8 | from rmp2.utils.bullet_utils import add_goal, add_obstacle_cylinder 9 | import numpy as np 10 | 11 | DEFAULT_CONFIG = { 12 | "workspace_radius": 0.75, 13 | # parameters for randomly generated goals 14 | "goal_torus_angle_center": 0., 15 | "goal_torus_angle_range": 2 * np.pi, 16 | "goal_torus_major_radius": 0.375, 17 | "goal_torus_minor_radius": 0.375, 18 | # parameters for randomly generated obstacles 19 | "obs_torus_angle_center": 0., 20 | "obs_torus_angle_range": 2 * np.pi, 21 | "obs_torus_major_radius": 0.65, 22 | "obs_torus_minor_radius": 0.25, 23 | # obstacle size 24 | "max_obstacle_radius": 0.1, 25 | "min_obstacle_radius": 0.05, 26 | 27 | "cam_dist": 1.5, 28 | "cam_yaw": 0, 29 | "cam_pitch": -85, 30 | "cam_position": [0, 0, 0], 31 | } 32 | 33 | class ThreeLinkEnv(RobotEnv): 34 | """ 35 | Base gym environment for 3-link robot 36 | """ 37 | def __init__(self, config=None): 38 | if config is not None: 39 | config = merge_dicts(DEFAULT_CONFIG, config) 40 | else: 41 | config = DEFAULT_CONFIG.copy() 42 | 43 | # random goal config 44 | self._goal_torus_angle_center = config["goal_torus_angle_center"] 45 | self._goal_torus_angle_range = config["goal_torus_angle_range"] 46 | self._goal_torus_major_radius = config["goal_torus_major_radius"] 47 | self._goal_torus_minor_radius = config["goal_torus_minor_radius"] 48 | # random obstacle config 49 | self._obs_torus_angle_center = config["obs_torus_angle_center"] 50 | self._obs_torus_angle_range = config["obs_torus_angle_range"] 51 | self._obs_torus_major_radius = config["obs_torus_major_radius"] 52 | self._obs_torus_minor_radius = config["obs_torus_minor_radius"] 53 | 54 | super().__init__( 55 | robot_name="3link", 56 | workspace_dim=2, 57 | config=config) 58 | 59 | def _generate_random_goal(self): 60 | # if goal is given, use the fixed goal 61 | if self.goal is None: 62 | current_goal = sample_from_torus_2d( 63 | self.np_random, 64 | self._goal_torus_angle_center, 65 | self._goal_torus_angle_range, 66 | self._goal_torus_major_radius, 67 | self._goal_torus_minor_radius) 68 | # otherwise, sample a random goal with the specified parameters 69 | else: 70 | current_goal = self.goal 71 | # generate goal object within pybullet 72 | goal_uid = add_goal(self._p, np.append(current_goal, 0.25)) 73 | return current_goal, goal_uid 74 | 75 | def _generate_random_obstacles(self): 76 | current_obs = [] 77 | obs_uids = [] 78 | 79 | # if obstacle config list is given, sample one config from the list 80 | if self.obstacle_cofigs is not None: 81 | config = self.obstacle_cofigs[self.np_random.randint(len(self.obstacle_cofigs))] 82 | for (i, obstacle) in enumerate(config): 83 | obs_uids.append( 84 | add_obstacle_cylinder( 85 | self._p, 86 | np.append(obstacle['center'], 0.25), 87 | obstacle['radius'], 0.5) 88 | ) 89 | current_obs.append(np.append(obstacle['center'], obstacle['radius'])) 90 | for i in range(len(config), self.max_obstacle_num): 91 | current_obs.append(np.append(np.zeros(self.workspace_dim), -1.)) 92 | # otherwise, sample random obstacles with the specified parameters 93 | else: 94 | num_obstacles = self.np_random.randint(self.min_obstacle_num, self.max_obstacle_num + 1) 95 | for i in range(self.max_obstacle_num): 96 | if i < num_obstacles: 97 | radius = self.np_random.uniform(low=self.min_obstacle_radius, high=self.max_obstacle_radius) 98 | center = sample_from_torus_2d( 99 | self.np_random, 100 | self._obs_torus_angle_center, 101 | self._obs_torus_angle_range, 102 | self._obs_torus_major_radius, 103 | self._obs_torus_minor_radius) 104 | obs_uids.append( 105 | add_obstacle_cylinder(self._p, np.append(center, 0.25), radius, 0.5) 106 | ) 107 | current_obs.append(np.append(center, radius)) 108 | else: 109 | current_obs.append(np.append(np.zeros(self.workspace_dim), -1.)) 110 | # generate obstacle objects within pybullet 111 | current_obs = np.array(current_obs).flatten() 112 | return current_obs, obs_uids -------------------------------------------------------------------------------- /rmp2/envs/three_link_residual_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gym environment for training residual policies 3 | on top of rmp2 policies for 3-link robot 4 | """ 5 | 6 | from rmp2.envs.three_link_env import ThreeLinkEnv 7 | from rmp2.rmpgraph.robotics import RobotRMPGraph 8 | from rmp2.utils.python_utils import merge_dicts 9 | import tensorflow as tf 10 | import numpy as np 11 | import os 12 | 13 | DEFAULT_CONFIG = { 14 | "dtype": "float32", 15 | "offset": 1e-3, 16 | } 17 | 18 | class ThreeLinkResidualEnv(ThreeLinkEnv): 19 | """ 20 | Gym environment for training residual policies 21 | on top of rmp2 policies for 3-link robot 22 | """ 23 | def __init__(self, config=None): 24 | if config is not None: 25 | config = merge_dicts(DEFAULT_CONFIG, config) 26 | else: 27 | config = DEFAULT_CONFIG.copy() 28 | 29 | # load rmp configs for the rmp2 policy 30 | config_path = os.path.join( 31 | os.path.dirname(__file__), 32 | '../configs/3link_residual_config.yaml' 33 | ) 34 | 35 | # create rmp2 policy 36 | self.rmp_graph = RobotRMPGraph( 37 | '3link', 38 | config_path=config_path, 39 | workspace_dim=2, 40 | dtype=config['dtype'], 41 | offset=config['offset']) 42 | 43 | self.dtype = config['dtype'] 44 | self._ts_goal = None 45 | self._ts_obs = None 46 | 47 | super().__init__(config=config) 48 | 49 | def _generate_random_goal(self): 50 | # additionally keep a goal tensor for computing rmp2 policy 51 | current_goal, goal_uid = super()._generate_random_goal() 52 | self._ts_goal = tf.convert_to_tensor(np.array([current_goal]), dtype=self.dtype) 53 | return current_goal, goal_uid 54 | 55 | def _generate_random_obstacles(self): 56 | # additionally keep a obstacle tensor for computing rmp2 policy 57 | current_obs, obs_uids = super()._generate_random_obstacles() 58 | self._ts_obs = tf.convert_to_tensor(np.array([current_obs]), dtype=self.dtype) 59 | self._ts_obs = tf.reshape(self._ts_obs, (1, -1, self.workspace_dim + 1)) 60 | return current_obs, obs_uids 61 | 62 | def step(self, residual_action): 63 | # compute the rmp2 policy 64 | joint_poses, joint_vels, _ = self._robot.get_observation() 65 | ts_joint_poses = tf.convert_to_tensor([joint_poses], dtype=self.dtype) 66 | ts_joint_vels = tf.convert_to_tensor([joint_vels], dtype=self.dtype) 67 | ts_action = self.rmp_graph(ts_joint_poses, ts_joint_vels, obstacles=self._ts_obs, goal=self._ts_goal) 68 | action = ts_action.numpy() 69 | action = action.flatten() 70 | action = np.clip(action, self._action_space.low, self._action_space.high) 71 | # add the residual policy and rmp2 policy together 72 | action = action + residual_action 73 | return super().step(action) -------------------------------------------------------------------------------- /rmp2/envs/three_link_residual_rmp_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gym environment for training rmp policies 3 | on top of rmp2 policies for 3-link robot 4 | """ 5 | 6 | from rmp2.envs.three_link_env import ThreeLinkEnv 7 | from rmp2.rmpgraph.robotics import RobotRMPGraph 8 | from rmp2.utils.python_utils import merge_dicts 9 | from gym import spaces 10 | import tensorflow as tf 11 | import numpy as np 12 | import os 13 | 14 | BULLET_LINK_POSE_INDEX = 4 15 | BULLET_LINK_VEL_INDEX = 6 16 | 17 | DEFAULT_CONFIG = { 18 | "dtype": "float32", 19 | "offset": 1e-3, 20 | } 21 | 22 | class ThreeLinkResidualRMPEnv(ThreeLinkEnv): 23 | """ 24 | Gym environment for training rmp policies 25 | on top of rmp2 policies for 3-link robot 26 | """ 27 | def __init__(self, config=None): 28 | if config is not None: 29 | config = merge_dicts(DEFAULT_CONFIG, config) 30 | else: 31 | config = DEFAULT_CONFIG.copy() 32 | 33 | # load rmp configs for the rmp2 policy 34 | config_path = os.path.join( 35 | os.path.dirname(__file__), 36 | '../configs/3link_residual_rmp_config.yaml' 37 | ) 38 | 39 | # create rmp2 policy 40 | self.rmp_graph = RobotRMPGraph( 41 | '3link', 42 | config_path=config_path, 43 | workspace_dim=2, 44 | dtype=config['dtype'], 45 | offset=config['offset']) 46 | 47 | self.dtype = config['dtype'] 48 | self._ts_goal = None 49 | self._ts_obs = None 50 | 51 | # find all the residual rmps to be learned 52 | self.external_rmp_names = [] 53 | self.external_rmp_links = [] 54 | for key, rmp in zip(self.rmp_graph.keys, self.rmp_graph.rmps): 55 | if key == 'external_rmp': 56 | self.external_rmp_names.append(rmp.feature_name) 57 | self.external_rmp_links.append(rmp.link) 58 | 59 | super().__init__(config=config) 60 | 61 | action_space_dim = 0 62 | for link in self.external_rmp_links: 63 | if link == 'joint': 64 | action_space_dim += self.cspace_dim * (self.cspace_dim + 1) 65 | else: 66 | action_space_dim += self.workspace_dim * (self.workspace_dim + 1) 67 | self.action_space = spaces.Box( 68 | low=-np.inf, high=np.inf, 69 | shape=(action_space_dim,), dtype=self.dtype) 70 | 71 | 72 | def _generate_random_goal(self): 73 | # additionally keep a goal tensor for computing rmp2 policy 74 | current_goal, goal_uid = super()._generate_random_goal() 75 | self._ts_goal = tf.convert_to_tensor([current_goal], dtype=self.dtype) 76 | return current_goal, goal_uid 77 | 78 | def _generate_random_obstacles(self): 79 | # additionally keep a obstacle tensor for computing rmp2 policy 80 | current_obs, obs_uids = super()._generate_random_obstacles() 81 | self._ts_obs = tf.convert_to_tensor([current_obs], dtype=self.dtype) 82 | self._ts_obs = tf.reshape(self._ts_obs, (1, -1, self.workspace_dim + 1)) 83 | return current_obs, obs_uids 84 | 85 | def step(self, external_rmps): 86 | # unpack the flat output of the residual rmps 87 | index = 0 88 | features = {} 89 | for name, link in zip(self.external_rmp_names, self.external_rmp_links): 90 | if link == 'joint': 91 | dim = self.cspace_dim 92 | else: 93 | dim = self.workspace_dim 94 | metric_sqrt = external_rmps[index: index + dim ** 2] 95 | metric_sqrt = np.reshape(metric_sqrt, (dim, dim)) 96 | index += dim ** 2 97 | accel = external_rmps[index: index + dim] 98 | index += dim 99 | 100 | ts_metric_sqrt = tf.convert_to_tensor([metric_sqrt], dtype=self.dtype) 101 | ts_accel = tf.convert_to_tensor([accel], dtype=self.dtype) 102 | features[name] = (ts_metric_sqrt, ts_accel) 103 | 104 | # compute the combined rmp2 policy 105 | joint_poses, joint_vels, _ = self._robot.get_observation() 106 | ts_joint_poses = tf.convert_to_tensor([joint_poses], dtype=self.dtype) 107 | ts_joint_vels = tf.convert_to_tensor([joint_vels], dtype=self.dtype) 108 | 109 | ts_action = self.rmp_graph( 110 | ts_joint_poses, ts_joint_vels, 111 | obstacles=self._ts_obs, goal=self._ts_goal, 112 | **features) 113 | action = ts_action.numpy() 114 | action = action.flatten() 115 | 116 | return super().step(action) 117 | 118 | def get_extended_observation(self): 119 | # additionally add the current goal position, 120 | # and link poses & velocities for residual rmps 121 | 122 | link_poses = [] 123 | link_vels = [] 124 | 125 | for link in self.external_rmp_links: 126 | link_state = self._p.getLinkState( 127 | self._robot.robot_uid, self._robot._link_index[link], 128 | computeLinkVelocity=1, 129 | computeForwardKinematics=1) 130 | link_pose = link_state[BULLET_LINK_POSE_INDEX][:self.workspace_dim] 131 | link_vel = link_state[BULLET_LINK_VEL_INDEX][:self.workspace_dim] 132 | link_poses.append(link_pose) 133 | link_vels.append(link_vel) 134 | link_poses = np.array(link_poses).flatten() 135 | link_vels = np.array(link_vels).flatten() 136 | 137 | observation = super().get_extended_observation() 138 | 139 | self._observation = np.concatenate( 140 | (observation, self.current_goal, link_poses, link_vels) 141 | ) 142 | 143 | return self._observation -------------------------------------------------------------------------------- /rmp2/kinematics/__init__.py: -------------------------------------------------------------------------------- 1 | from rmp2.kinematics.tf_fk import Robot -------------------------------------------------------------------------------- /rmp2/kinematics/tf_fk.py: -------------------------------------------------------------------------------- 1 | """ 2 | Computing the robot forword kinematics in tensorflow 3 | """ 4 | 5 | from rmp2.utils.tf_transform_utils import T_prismatic, T_revolute, T_rpy 6 | from urdf_parser_py.urdf import URDF 7 | import tensorflow as tf 8 | import os 9 | 10 | 11 | class Joint: 12 | """ 13 | tensorflow module for joint 14 | """ 15 | def __init__(self, urdf_joint, dtype=tf.float32): 16 | """ 17 | :param urdf_joint: joint class from urdf_parser_py 18 | :param dtype: data type 19 | """ 20 | self.name = urdf_joint.name 21 | self.parent = urdf_joint.parent 22 | self.child = urdf_joint.child 23 | self.type = urdf_joint.type 24 | 25 | if urdf_joint.limit is not None: 26 | self.lower_limit = urdf_joint.limit.lower 27 | self.upper_limit = urdf_joint.limit.upper 28 | self.velocity_limit = urdf_joint.limit.velocity 29 | 30 | if urdf_joint.axis is None: 31 | self.axis = tf.constant([1., 0., 0.], dtype=dtype) 32 | else: 33 | axis = tf.convert_to_tensor(urdf_joint.axis, dtype=dtype) 34 | axis = (1. / tf.linalg.norm(axis)) * axis 35 | self.axis = axis 36 | 37 | self.xyz = tf.constant([0., 0., 0.], dtype=dtype) 38 | self.rpy = tf.constant([0., 0., 0.], dtype=dtype) 39 | if urdf_joint.origin is not None: 40 | if urdf_joint.origin.xyz is not None: 41 | self.xyz = tf.convert_to_tensor(urdf_joint.origin.xyz, dtype=dtype) 42 | if urdf_joint.origin.rpy is not None: 43 | self.rpy = tf.convert_to_tensor(urdf_joint.origin.rpy, dtype=dtype) 44 | 45 | def transformation(self, q=None): 46 | """ 47 | homogeneous transformation matrices for the joint 48 | :param q: joint configuration 49 | :return frame: homogenous transformation matrices 50 | """ 51 | if self.type == "fixed": 52 | assert q is None 53 | else: 54 | assert q is not None 55 | if self.type == "fixed": 56 | # todo: batch_size is not given here! 57 | frame = T_rpy(self.xyz, self.rpy) 58 | elif self.type == "prismatic": 59 | frame = T_prismatic(self.xyz, self.rpy, self.axis, q) 60 | elif self.type in ["revolute", "continuous"]: 61 | frame = T_revolute(self.xyz, self.rpy, self.axis, q) 62 | return frame 63 | 64 | 65 | class Robot: 66 | """ 67 | tensorflow module for robot forward kinematics 68 | """ 69 | def __init__(self, urdf_path, link_set=None, workspace_dim=3, dtype=tf.float32): 70 | """ 71 | :param urdf_path: path to urdf file 72 | :param link_set: list of links to compute fk 73 | :param workspace_dim: workspace dimension 74 | :param dtype: data type 75 | """ 76 | self.workspace_dim = workspace_dim 77 | self.dtype = dtype 78 | 79 | urdf_robot = URDF.from_xml_file(file_path=urdf_path) 80 | self.parent_map = urdf_robot.parent_map 81 | self.child_map = urdf_robot.child_map 82 | 83 | self.base_link = urdf_robot.get_root() 84 | if link_set is not None: 85 | self.link_set = list(link_set) 86 | else: 87 | self.link_set = list(urdf_robot.link_map.keys()) 88 | 89 | # only take the joints that are required to compute fk for links in link_set 90 | self.joint_map = {} 91 | for link in self.link_set: 92 | chain = urdf_robot.get_chain(self.base_link, link, links=False) 93 | for joint in chain: 94 | if not joint in self.joint_map: 95 | self.joint_map[joint] = Joint(urdf_robot.joint_map[joint], dtype=dtype) 96 | # topologically sort the joints 97 | self.set_joint_names() 98 | 99 | self.joint_lower_limits = tf.constant([self.joint_map[joint].lower_limit for joint in self.non_fixed_joint_names], dtype=self.dtype) 100 | self.joint_upper_limits = tf.constant([self.joint_map[joint].upper_limit for joint in self.non_fixed_joint_names], dtype=self.dtype) 101 | self.joint_velocity_limits = tf.constant([self.joint_map[joint].velocity_limit for joint in self.non_fixed_joint_names], dtype=self.dtype) 102 | 103 | 104 | def forward_kinematics(self, q): 105 | """ 106 | compute the forward kinematics 107 | --------------------------- 108 | :param q: configuration space coordinate 109 | :return x: dictionary of task space coordinates 110 | """ 111 | if len(q.shape) == 1: 112 | q = tf.expand_dims(q, 0) 113 | batch_size = q.shape[0] 114 | 115 | frames = {} 116 | frame = tf.eye(4, batch_shape=(batch_size,), dtype=self.dtype) 117 | frames[self.base_link] = frame 118 | # recursively compute fk 119 | self._forward_kinematics(self.base_link, q, frames) 120 | 121 | # take the coordinates corresponding to the links in link_set 122 | x = {} 123 | for link in self.link_set: 124 | x[link] = frames[link][:, 0:self.workspace_dim, -1] 125 | return x 126 | 127 | def _forward_kinematics(self, base_link, q, frames): 128 | """ 129 | recursively compute forward kinematics 130 | ------------------------ 131 | :param base_link: the name of the current base link 132 | :param q: joint coordinate from base_link upwards 133 | :param frames: homogenous transformation from configuration space to base_link space 134 | """ 135 | assert base_link in frames 136 | 137 | # base_link is a leaf in the kinematic tree 138 | if base_link not in self.child_map: 139 | return 140 | 141 | # compute fk for the child links 142 | children_list = self.child_map[base_link] 143 | for child_joint_name, child_link in children_list: 144 | if child_joint_name in self.joint_map: 145 | child_joint = self.joint_map[child_joint_name] 146 | if child_joint.type == 'fixed': 147 | frames[child_link] = tf.matmul(frames[base_link], child_joint.transformation()) 148 | self._forward_kinematics(child_link, q, frames) 149 | else: 150 | frames[child_link] = tf.matmul(frames[base_link], child_joint.transformation(q[:, 0])) 151 | q = q[:, 1:] 152 | self._forward_kinematics(child_link, q, frames) 153 | 154 | 155 | def set_joint_names(self): 156 | """ 157 | topologically sort the joint names 158 | """ 159 | joint_names = [] 160 | non_fixed_joint_names = [] 161 | self._sort_joint_names(self.base_link, joint_names, non_fixed_joint_names) 162 | self.joint_names = joint_names 163 | self.non_fixed_joint_names = non_fixed_joint_names 164 | 165 | 166 | def _sort_joint_names(self, base_link, joint_names, non_fixed_joint_names): 167 | """ 168 | sort the joint names recursively 169 | """ 170 | # base_link is a leaf in the kinematic tree 171 | if base_link not in self.child_map: 172 | return 173 | 174 | children_list = self.child_map[base_link] 175 | for child_joint_name, child_link in children_list: 176 | if child_joint_name in self.joint_map: 177 | child_joint = self.joint_map[child_joint_name] 178 | joint_names.append(child_joint_name) 179 | if child_joint.type != 'fixed': 180 | non_fixed_joint_names.append(child_joint_name) 181 | self._sort_joint_names(child_link, joint_names, non_fixed_joint_names) 182 | 183 | @property 184 | def num_joints(self): 185 | return len(self.non_fixed_joint_names) 186 | 187 | @property 188 | def cspace_dim(self): 189 | return len(self.non_fixed_joint_names) -------------------------------------------------------------------------------- /rmp2/policies/gaussian_policy.py: -------------------------------------------------------------------------------- 1 | """ 2 | customized rllib tf2 model for guassian policies 3 | """ 4 | 5 | from ray.rllib.models.tf.tf_modelv2 import TFModelV2 6 | from ray.rllib.utils.framework import try_import_tf 7 | from rmp2.policies.policy_networks import get_policy_network 8 | from rmp2.utils.env_wrappers import load_env_wrapper 9 | import numpy as np 10 | 11 | tf1, tf, tfv = try_import_tf() 12 | 13 | class DiagGaussianModel(TFModelV2): 14 | """Custom model for policy gradient algorithms.""" 15 | 16 | def __init__(self, obs_space, action_space, num_outputs, model_config, name, 17 | policy_config, value_config, env_wrapper, init_lstd=-1., min_std=1e-12, clip_mean=None, dtype=tf.float32): 18 | """ 19 | not use num_outputs 20 | """ 21 | super().__init__(obs_space, action_space, 22 | num_outputs, model_config, name) 23 | self.num_outputs = num_outputs 24 | 25 | obs_dim = obs_space.shape[0] 26 | action_dim = action_space.shape[0] 27 | 28 | self.bounded = np.logical_and(action_space.bounded_above, 29 | action_space.bounded_below).any() 30 | 31 | self.clip_mean = clip_mean 32 | if self.clip_mean == "squash": 33 | self.action_range = tf.constant((action_space.high - action_space.low)[None]) 34 | self.low_action = tf.constant(action_space.low[None]) 35 | elif self.clip_mean == "clip": 36 | self.action_space_high = tf.constant(action_space.high[None]) 37 | self.action_space_low = tf.constant(action_space.low[None]) 38 | 39 | if type(env_wrapper) == str: 40 | env_wrapper = load_env_wrapper(env_wrapper, dtype=dtype) 41 | self.env_wrapper = env_wrapper 42 | 43 | 44 | _dummy_obs = tf.zeros((1, obs_dim), dtype=dtype) 45 | _dummy_policy_input = self.env_wrapper.obs_to_policy_input(_dummy_obs) 46 | _dummy_value_input = self.env_wrapper.obs_to_value_input(_dummy_obs) 47 | if type(_dummy_policy_input) == dict: 48 | policy_input_dim = None 49 | else: 50 | policy_input_dim = int(_dummy_policy_input.shape[1]) 51 | value_input_dim = int(_dummy_value_input.shape[1]) 52 | 53 | self.policy_net = get_policy_network(policy_input_dim, action_dim, policy_config, name=name+'_policy', dtype=dtype) # todo: initialize policy network 54 | self.value_net = get_policy_network(value_input_dim, 1, value_config, name=name+'_value', dtype=dtype) # todo: initialize value network 55 | 56 | init_lstd = np.broadcast_to(init_lstd, action_dim) 57 | self._lstd = tf.Variable(init_lstd, dtype=dtype) 58 | 59 | self._min_lstd = tf.constant(np.log(min_std), dtype=dtype) 60 | 61 | variables = self.policy_net.variables + self.value_net.variables + (self._lstd,) 62 | self.register_variables(variables) 63 | 64 | def _policy(self, input_dict): 65 | policy_input = self.env_wrapper.obs_to_policy_input(input_dict["obs"]) 66 | policy_output = self.policy_net(policy_input) 67 | action = self.env_wrapper.policy_output_to_action(policy_output) 68 | return action 69 | 70 | def _value(self, input_dict): 71 | value_input = self.env_wrapper.obs_to_value_input(input_dict["obs"]) 72 | value = self.value_net(value_input) 73 | return value 74 | 75 | def forward(self, input_dict, state, seq_lens): 76 | x = self._policy(input_dict) 77 | if self.bounded and self.clip_mean == "squash": 78 | sigmoid_out = tf.nn.sigmoid(2 * x) 79 | mean = self.action_range * sigmoid_out + self.low_action 80 | if self.clip_mean == "clip": 81 | mean = tf.clip_by_value(x, self.action_space_low, self.action_space_high) 82 | else: 83 | mean = x 84 | lstd = tf.zeros_like(mean) + self.lstd 85 | model_out = tf.concat([mean, lstd], axis=1) 86 | 87 | self._value_out = self._value(input_dict) 88 | assert model_out.shape[1] == self.num_outputs 89 | return model_out, state 90 | 91 | def value_function(self): 92 | return tf.reshape(self._value_out, [-1]) 93 | 94 | @property 95 | def lstd(self): 96 | return tf.maximum(self._lstd, self._min_lstd) -------------------------------------------------------------------------------- /rmp2/policies/policy_networks.py: -------------------------------------------------------------------------------- 1 | """ 2 | helper function for creating policy networks 3 | """ 4 | 5 | from rmp2.utils.tf_utils import MLP 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | def get_policy_network(x_shape, y_shape, config, dtype, name='policy_network'): 10 | if config['model'] == 'mlp': 11 | del config['model'] 12 | network = MLP(x_shape, y_shape, name=name, dtype=dtype, **config) 13 | elif config['model'] == 'flat_rmp': 14 | del config['model'] 15 | network = FlatRMPPolicy( 16 | x_shape, y_shape, 17 | rmp_config=config, 18 | dtype=dtype, name=name) 19 | else: 20 | raise ValueError 21 | return network 22 | 23 | 24 | class FlatRMPCanonical(tf.Module): 25 | def __init__(self, x_shape, feature_shape=0, feature_keys=None, units=(), activation='tanh', 26 | max_metric_value=10., max_accel_value=3., 27 | hidden_layer_init_scale=2.0, output_layer_init_scale=0.1, init_distribution='uniform', 28 | dtype=tf.float32, name='flat_rmp'): 29 | 30 | super().__init__(name=name) 31 | 32 | if feature_keys is None: 33 | self.feature_keys = [] 34 | else: 35 | self.feature_keys = feature_keys 36 | self.dtype = dtype 37 | 38 | self.net = MLP( 39 | x_shape * 2 + feature_shape, x_shape ** 2 + x_shape, units, activation=activation, 40 | hidden_layer_init_scale=hidden_layer_init_scale, output_layer_init_scale=output_layer_init_scale, init_distribution='uniform', 41 | dtype=dtype, name=name+'_mlp') 42 | 43 | rmp_limit = np.array([[np.sqrt(max_metric_value)] * x_shape ** 2 + [max_accel_value] * x_shape]) 44 | self.rmp_limit = tf.convert_to_tensor(rmp_limit, dtype=dtype) 45 | 46 | def __call__(self, x, xd, **features): 47 | features = {key: features[key] for key in self.feature_keys} 48 | 49 | state = tf.concat((x, xd) + tuple(features.values()), axis=1) 50 | flat_rmp = self.net(state) 51 | flat_rmp = tf.nn.tanh(flat_rmp) * self.rmp_limit 52 | return flat_rmp 53 | 54 | 55 | class FlatRMPPolicy(tf.Module): 56 | def __init__(self, x_shape, y_shape, rmp_config, dtype='float32', name='flat_rmpnet', **kwargs): 57 | super().__init__(name=name) 58 | 59 | self.x_shape = x_shape 60 | self.y_shape = y_shape 61 | 62 | self.rmp = FlatRMPCanonical(dtype=dtype, **rmp_config) 63 | 64 | def __call__(self, x): 65 | actions = self.rmp(**x) 66 | return actions -------------------------------------------------------------------------------- /rmp2/rmpgraph/__init__.py: -------------------------------------------------------------------------------- 1 | from rmp2.rmpgraph.robotics import RobotRMPGraph -------------------------------------------------------------------------------- /rmp2/rmpgraph/rmpgraph.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base class for rmp2 graph 3 | """ 4 | 5 | from rmp2.utils.tf_utils import ip, ipm, solve, gradient, batch_jacobian 6 | from rmp2.utils.python_utils import timing 7 | import tensorflow as tf 8 | from abc import ABC, abstractmethod 9 | 10 | 11 | class RMPGraph(tf.Module, ABC): 12 | """ 13 | tensorflow module for RMP^2 14 | """ 15 | def __init__(self, rmps, rmp_type='canonical', timed=False, offset=1e-3, dtype=tf.float32, name='rmpgraph', **kwargs): 16 | super(RMPGraph, self).__init__(name=name) 17 | 18 | self.rmps = rmps 19 | self.rmp_type = rmp_type 20 | self.timed = timed 21 | 22 | self.offset = offset 23 | 24 | 25 | @abstractmethod 26 | def forward_mapping(self, q, **features): 27 | """ 28 | forward mapping from root node to leaf nodes given environment features 29 | -------------------------------------------- 30 | :param q: root node coordinate 31 | :param features: environment features, lists/dicts, e.g. goals, obstacles, etc. 32 | :return xs: list of leaf node coordinates 33 | """ 34 | 35 | def rmp_evals(self, xs, xds, **features): 36 | """ 37 | evaluate the geometry at leaf nodes given environment features 38 | -------------------------------------------- 39 | :param xs: list of leaf node coordinates 40 | :param xds: list of leaf node generalized velocities 41 | :param features: environment features, lists/dicts, e.g. goals, obstacles, etc. 42 | :return metrics: list of leaf node metrics 43 | :return forces/accelerations: list of leaf node forces/accelerations 44 | """ 45 | metrics = [] 46 | accelerations = [] 47 | 48 | # loop over leaf nodes and evaulate rmp at each leaf node 49 | for (x, xd, rmp) in zip(xs, xds, self.rmps): 50 | metric, acceleration = rmp(x, xd, rmp_type=self.rmp_type, **features) 51 | metrics.append(metric) 52 | accelerations.append(acceleration) 53 | 54 | if not tf.math.reduce_all(tf.math.is_finite(metric)): 55 | tf.print('!!!nan or inf in leaf metric!!!', rmp.name) 56 | if not tf.math.reduce_all(tf.math.is_finite(acceleration)): 57 | tf.print('!!!nan or inf in leaf force!!!!', rmp.name) 58 | 59 | return metrics, accelerations 60 | 61 | 62 | def solve(self, q, qd, method='rmp2', mode='value', **features): 63 | if method == 'rmp2': 64 | return self.solve_rmp2(q, qd, mode=mode, **features) 65 | elif method == 'direct': 66 | return self.solve_direct(q, qd, mode=mode, **features) 67 | else: 68 | raise ValueError 69 | 70 | @tf.function 71 | def solve_rmp2(self, q, qd, mode='value', **features): 72 | tf.compat.v1.enable_v2_behavior() 73 | print('------------buiding graph--------------') 74 | 75 | with timing(' forward pass', self.timed): 76 | # compute the generalized coordinates, velocities, and curvatures 77 | # at the leaf nodes 78 | 79 | with tf.GradientTape(watch_accessed_variables=False) as gg: 80 | gg.watch(q) 81 | with tf.GradientTape(watch_accessed_variables=False) as ggg: 82 | ggg.watch(q) 83 | # generalized coordinates through forward mapping 84 | xs = self.forward_mapping(q, **features) 85 | dummy_ones = [tf.ones_like(x) for x in xs] 86 | with tf.GradientTape(persistent=True, watch_accessed_variables=False) as g: 87 | g.watch(dummy_ones) 88 | sum_x = ip(xs, dummy_ones) 89 | sum_xd = ip(qd, [gradient(ggg, sum_x, q)]) 90 | grd = gradient(gg, sum_xd, q) 91 | sum_crv = ip(qd, grd) 92 | 93 | # curvature terms 94 | xds = gradient(g, sum_xd, dummy_ones) 95 | crvs = gradient(g, sum_crv, dummy_ones) 96 | 97 | for crv, rmp in zip(crvs, self.rmps): 98 | if not tf.math.reduce_all(tf.math.is_finite(crv)): 99 | tf.print('!!!nan or inf in curvature!!!', rmp.name) 100 | 101 | # evaluate the rmps at the leaf nodes 102 | with timing(' rmp evaluation', self.timed): 103 | if self.rmp_type == 'canonical': 104 | mtr_leafs, acc_leafs = self.rmp_evals(xs, xds, **features) 105 | elif self.rmp_type == 'natural': 106 | mtr_leafs, fce_leafs = self.rmp_evals(xs, xds, **features) 107 | else: 108 | raise ValueError 109 | 110 | # compute pullback metric and rhs 111 | with timing(' backward pass', self.timed): 112 | dummy_q = q + 0. 113 | 114 | with tf.GradientTape(persistent=True, watch_accessed_variables=False) as g: 115 | g.watch(dummy_q) 116 | dummy_xs = self.forward_mapping(dummy_q, **features) 117 | with tf.GradientTape(watch_accessed_variables=False) as gg: 118 | gg.watch(q) 119 | xs = self.forward_mapping(q, **features) 120 | y = ipm(dummy_xs, xs, mtr_leafs) 121 | grd = gradient(gg, y, q) 122 | 123 | # right hand side 124 | if self.rmp_type == 'canonical': 125 | z = ipm(dummy_xs, acc_leafs, mtr_leafs) - ipm(dummy_xs, crvs, mtr_leafs) 126 | elif self.rmp_type == 'natural': 127 | z = ip(dummy_xs, fce_leafs) - ipm(dummy_xs, crvs, mtr_leafs) 128 | if not tf.math.reduce_all(tf.math.is_finite(z)): 129 | tf.print('!!!nan or inf in z!!!') 130 | 131 | f = gradient(g, z, dummy_q) 132 | if not tf.math.reduce_all(tf.math.is_finite(f)): 133 | tf.print('!!!nan or inf in root force (before normalization) !!!') 134 | # pullback metrics 135 | M = batch_jacobian(g, grd, dummy_q) 136 | if not tf.math.reduce_all(tf.math.is_finite(f)): 137 | tf.print('!!!nan or inf in root metric (before normalization) !!!') 138 | # solve for generalized acceleration at the root 139 | 140 | if mode == 'tuple': 141 | return M, f 142 | elif mode == 'value': 143 | with timing(' resolve', self.timed): 144 | # scale both the metric and the force for the same amount 145 | M_max = tf.reduce_max(tf.abs(M), [1, 2]) * 0.01 146 | M_max = tf.maximum(M_max, tf.ones_like(M_max)) 147 | M = tf.einsum('b, bij->bij', 1. / M_max, M) 148 | f = tf.einsum('b, bi->bi', 1. / M_max, f) 149 | 150 | # add a small offset to the diagonal for numerical stability 151 | M = M + self.offset * tf.eye(M.shape[1], batch_shape=(M.shape[0],), dtype=M.dtype) 152 | 153 | if not tf.math.reduce_all(tf.math.is_finite(M)): 154 | tf.print('!--------nan in root metric--------!') 155 | if not tf.math.reduce_all(tf.math.is_finite(f)): 156 | tf.print('!--------nan in root force--------!') 157 | 158 | qdd = solve(M, f) 159 | 160 | if not tf.math.reduce_all(tf.math.is_finite(qdd)): 161 | tf.print('!--------nan in root acceleration--------!') 162 | return qdd 163 | else: 164 | raise ValueError 165 | 166 | 167 | @tf.function 168 | def solve_direct(self, q, qd, mode='value', **features): 169 | with timing(' forward pass', self.timed): 170 | with tf.GradientTape(watch_accessed_variables=False) as gg: 171 | gg.watch(q) 172 | with tf.GradientTape(persistent=True, watch_accessed_variables=False) as ggg: 173 | ggg.watch(q) 174 | xs = self.forward_mapping(q, **features) 175 | dummy_ones = [tf.ones_like(x) for x in xs] 176 | # compute jacobians 177 | jacs = [batch_jacobian(ggg, x, q) for x in xs] 178 | # del ggg 179 | xds = [tf.linalg.matvec(jac, qd) for jac in jacs] 180 | with tf.GradientTape(watch_accessed_variables=False) as g: 181 | g.watch(dummy_ones) 182 | sum_xd = ip(dummy_ones, xds) 183 | grd = gradient(gg, sum_xd, q) 184 | sum_crv = ip(qd, grd) 185 | crvs = gradient(g, sum_crv, dummy_ones) 186 | 187 | # evaluate the rmps at the leaf nodes 188 | with timing(' rmp evaluation', self.timed): 189 | if self.rmp_type == 'canonical': 190 | mtr_leafs, acc_leafs = self.rmp_evals(xs, xds, **features) 191 | elif self.rmp_type == 'natural': 192 | mtr_leafs, fce_leafs = self.rmp_evals(xs, xds, **features) 193 | else: 194 | raise ValueError 195 | 196 | # compute pullback metric and forces 197 | with timing(' pullback', self.timed): 198 | M = sum(tf.einsum('bji, bjk, bkl->bil', jac, mtr_leaf, jac) for (jac, mtr_leaf) in zip(jacs, mtr_leafs)) 199 | 200 | if self.rmp_type == 'canonical': 201 | f = sum( 202 | tf.einsum( 203 | 'bji, bjk, bk->bi', 204 | jac, mtr_leaf, acc_leaf - crv 205 | ) for (jac, mtr_leaf, acc_leaf, crv) in zip(jacs, mtr_leafs, acc_leafs, crvs)) 206 | elif self.rmp_type == 'natural': 207 | f = sum( 208 | tf.einsum( 209 | 'bji, bj->bi', 210 | jac, fce_leaf) for (jac, fce_leaf) in zip(jacs, fce_leafs)) - \ 211 | sum( 212 | tf.einsum( 213 | 'bji, bjk, bk->bi', 214 | jac, mtr_leaf, crv) for (jac, mtr_leaf, crv) in zip(jacs, mtr_leafs, crvs)) 215 | else: 216 | raise ValueError 217 | 218 | if mode == 'tuple': 219 | return M, f 220 | elif mode == 'value': 221 | with timing(' resolve', self.timed): 222 | # scale both the metric and the force for the same amount 223 | M_max = tf.reduce_max(tf.abs(M), [1, 2]) * 0.01 224 | M = tf.einsum('b, bij->bij', 1. / M_max, M) 225 | f = tf.einsum('b, bi->bi', 1. / M_max, f) 226 | 227 | # add a small offset to the diagonal for numerical stability 228 | M = M + self.offset * tf.eye(M.shape[1], batch_shape=(M.shape[0],), dtype=M.dtype) 229 | 230 | if not tf.math.reduce_all(tf.math.is_finite(M)): 231 | tf.print('!--------nan in root metric--------!') 232 | if not tf.math.reduce_all(tf.math.is_finite(f)): 233 | tf.print('!--------nan in root force--------!') 234 | 235 | qdd = solve(M, f) 236 | 237 | if not tf.math.reduce_all(tf.math.is_finite(qdd)): 238 | tf.print('!--------nan in root acceleration--------!') 239 | return qdd 240 | else: 241 | raise ValueError 242 | 243 | def __call__(self, q, qd, **features): 244 | return self.solve(q, qd, **features) 245 | -------------------------------------------------------------------------------- /rmp2/rmpgraph/rmps/__init__.py: -------------------------------------------------------------------------------- 1 | from rmp2.rmpgraph.rmps.rmps import * -------------------------------------------------------------------------------- /rmp2/rmpgraph/robotics.py: -------------------------------------------------------------------------------- 1 | """ 2 | pre-specified rmp2 graph class for robots 3 | """ 4 | 5 | from rmp2.rmpgraph.rmpgraph import RMPGraph 6 | from rmp2.kinematics import Robot 7 | from rmp2.rmpgraph.rmps import get_rmp, ExternalCanonicalRMP 8 | from rmp2.rmpgraph.taskmaps import dist2balls, distmap 9 | from rmp2.utils.robot_config_utils import load_robot_config, get_robot_urdf_path 10 | import tensorflow as tf 11 | 12 | ROBOT_RMPS = [ 13 | 'cspace_target_rmp', 14 | 'joint_limit_rmp', 15 | 'joint_velocity_cap_rmp', 16 | 'target_rmp', 17 | 'collision_rmp', 18 | 'damping_rmp', 19 | 'external_rmps' 20 | ] 21 | 22 | 23 | def get_control_points(link_positions, arm_collision_controllers): 24 | control_points = [] 25 | for controller in arm_collision_controllers: 26 | end1, end2 = controller['segment'] 27 | interpolation_points = controller['interpolation_pts'] 28 | for k in range(interpolation_points): 29 | alpha = 1. * (k + 1) / interpolation_points 30 | point = alpha * link_positions[end2] + (1 - alpha) * link_positions[end1] 31 | control_points.append(point) 32 | control_points = tf.stack(control_points, axis=1) 33 | return control_points 34 | 35 | 36 | 37 | class RobotRMPGraph(RMPGraph): 38 | """ 39 | pre-specified rmp2 graph class for robots 40 | """ 41 | def __init__(self, robot_name=None, config_path=None, config=None, workspace_dim=3, dtype=tf.float32, rmp_type='canonical', timed=False, offset=1e-3, name='robot'): 42 | assert(robot_name is not None or config_path is not None or config is not None) 43 | assert(config_path is None or config is None) 44 | # set up robot 45 | if config is None: 46 | robot_name, config = load_robot_config(robot_name=robot_name, config_path=config_path) 47 | elif robot_name is not None: 48 | assert robot_name == config['robot_name'] 49 | else: 50 | robot_name = config['robot_name'] 51 | 52 | urdf_path = get_robot_urdf_path(robot_name) 53 | 54 | self.robot = Robot(urdf_path, workspace_dim=workspace_dim, dtype=dtype) 55 | self.eef_link = config['eef_link'] 56 | self.workspace_dim = workspace_dim 57 | self.cspace_dim = self.robot.cspace_dim 58 | self.dtype = dtype 59 | 60 | self.joint_lower_limits = self.robot.joint_lower_limits + config['joint_limit_buffers'] 61 | self.joint_upper_limits = self.robot.joint_upper_limits - config['joint_limit_buffers'] 62 | self.joint_velocity_limits = self.robot.joint_velocity_limits 63 | 64 | self.default_config = tf.constant(config['default_q'], dtype=self.dtype) 65 | 66 | self.rmp_config = config['rmp_params'] 67 | 68 | # set up RMPs 69 | rmps = [] 70 | self.keys = [] 71 | 72 | for key in ROBOT_RMPS: 73 | if key in self.rmp_config: 74 | if key is not 'external_rmps': 75 | rmps.append(get_rmp(key, self.rmp_config[key], dtype=dtype)) 76 | self.keys.append(key) 77 | else: 78 | for external_rmp in self.rmp_config[key]: 79 | rmps.append(ExternalCanonicalRMP(**external_rmp, dtype=dtype)) 80 | self.keys.append('external_rmp') 81 | 82 | # control points setup 83 | self.arm_collision_controllers = config['arm_collision_controllers'] 84 | 85 | arm_collision_radii = [] 86 | for arm_controller in self.arm_collision_controllers: 87 | arm_collision_radii += [arm_controller['radius']] * arm_controller['interpolation_pts'] 88 | 89 | self.arm_collision_radii = tf.expand_dims(tf.constant(arm_collision_radii, dtype=dtype), 0) 90 | 91 | # handle collision avoidance with the base of the robot 92 | body_obstacles = dict() 93 | body_obstacle_box_mins = [] 94 | body_obstacle_box_maxs = [] 95 | body_obstacle_ball_centers = [] 96 | body_obstacle_ball_radii = [] 97 | for body_obstacle in config['body_obstacles']: 98 | if body_obstacle['type'] == 'box': 99 | body_obstacle_box_mins.append(body_obstacle['mins']) 100 | body_obstacle_box_maxs.append(body_obstacle['maxs']) 101 | elif body_obstacle['type'] == 'ball': 102 | body_obstacle_ball_centers.append(body_obstacle['center']) 103 | body_obstacle_ball_radii.append(body_obstacle['radius']) 104 | 105 | if len(body_obstacle_box_mins): 106 | body_obstacle_box_mins = tf.convert_to_tensor([body_obstacle_box_mins], dtype=dtype) 107 | body_obstacle_box_maxs = tf.convert_to_tensor([body_obstacle_box_maxs], dtype=dtype) 108 | 109 | body_obstacles['box'] = { 110 | 'obstacle_mins': body_obstacle_box_mins, 111 | 'obstacle_maxs': body_obstacle_box_maxs 112 | } 113 | 114 | if len(body_obstacle_ball_centers): 115 | body_obstacle_ball_centers = tf.convert_to_tensor([body_obstacle_ball_centers], dtype=dtype) 116 | body_obstacle_ball_radii = tf.convert_to_tensor([body_obstacle_ball_radii], dtype=dtype) 117 | 118 | body_obstacles['ball'] = { 119 | 'obstacle_centers': body_obstacle_ball_centers, 120 | 'obstacle_radii': body_obstacle_ball_radii 121 | } 122 | 123 | self.body_obstacles = body_obstacles 124 | 125 | super().__init__(rmps, rmp_type=rmp_type, timed=timed, dtype=dtype, name=name) 126 | 127 | 128 | def forward_mapping(self, q, **features): 129 | """ 130 | forward mapping from root node to leaf nodes given environment features 131 | -------------------------------------------- 132 | :param q: root node coordinate 133 | :param features: environment features, lists/dicts, e.g. goals, obstacles, etc. 134 | :return xs: list of leaf node coordinates 135 | """ 136 | 137 | batch_size, input_dim = q.shape 138 | x = [] 139 | 140 | if input_dim < self.cspace_dim: 141 | q = tf.concat([q, tf.zeros((batch_size, self.cspace_dim - input_dim), dtype=self.dtype)], axis=1) 142 | 143 | # forward kinematics 144 | link_positions = self.robot.forward_kinematics(q) 145 | 146 | # ---------------------------- 147 | # task spaces 148 | # ---------------------------- 149 | 150 | for i, key in enumerate(self.keys): 151 | # ---------------------------- 152 | # default configuration 153 | if key == 'cspace_target_rmp': 154 | x.append(q - self.default_config) 155 | 156 | # ---------------------------- 157 | # joint limits 158 | elif key == 'joint_limit_rmp': 159 | x_upper = self.joint_upper_limits - q 160 | x_lower = q - self.joint_lower_limits 161 | x_joint_limit = tf.reshape(tf.concat([x_upper, x_lower], axis=1), (-1, 1)) 162 | x.append(x_joint_limit) 163 | 164 | # ---------------------------- 165 | # joint velocity limits 166 | elif key == 'joint_velocity_cap_rmp': 167 | x_velocity_cap = tf.reshape(q, (-1, 1)) 168 | x.append(x_velocity_cap) 169 | 170 | # ---------------------------- 171 | # goal attractor 172 | elif key == 'target_rmp': 173 | eef_pos = link_positions[self.eef_link] 174 | x.append(eef_pos * 1.) 175 | 176 | # ---------------------------- 177 | # collision avoidance 178 | elif key == 'collision_rmp': 179 | control_points = get_control_points(link_positions, self.arm_collision_controllers) 180 | 181 | obstacles = features['obstacles'] 182 | obstacles = tf.reshape(obstacles, (batch_size, -1, self.workspace_dim + 1)) 183 | obstacle_centers = obstacles[:, :, :self.workspace_dim] 184 | obstacle_radii = obstacles[:, :, -1] 185 | 186 | obstacle_dists = dist2balls( 187 | control_points, 188 | self.arm_collision_radii, 189 | obstacle_centers, obstacle_radii) 190 | 191 | for body_obstacle_type, body_obstacle in self.body_obstacles.items(): 192 | body_obstacle_dists = distmap( 193 | body_obstacle_type, 194 | control_points, 195 | self.arm_collision_radii, 196 | **body_obstacle 197 | ) 198 | obstacle_dists = tf.concat([obstacle_dists, body_obstacle_dists], axis=0) 199 | 200 | x.append(obstacle_dists) 201 | 202 | # ---------------------------- 203 | # joint damping 204 | elif key == 'damping_rmp': 205 | x.append(q * 1.) 206 | 207 | # ---------------------------- 208 | # neural network rmp 209 | elif key == 'external_rmp': 210 | link = self.rmps[i].link 211 | if link == 'joint': 212 | x.append(q * 1.) 213 | else: 214 | x.append(link_positions[link] * 1.) 215 | 216 | else: 217 | raise ValueError 218 | 219 | return x -------------------------------------------------------------------------------- /rmp2/rmpgraph/taskmaps/__init__.py: -------------------------------------------------------------------------------- 1 | from rmp2.rmpgraph.taskmaps.distance_taskmaps import * -------------------------------------------------------------------------------- /rmp2/rmpgraph/taskmaps/distance_taskmaps.py: -------------------------------------------------------------------------------- 1 | """ 2 | helper functions for computing the distance between balls 3 | and between balls and boxes 4 | """ 5 | 6 | from rmp2.utils.tf_utils import pdist2 7 | import tensorflow as tf 8 | 9 | 10 | def distmap(type, control_points, control_point_radii, **params): 11 | if type == 'ball': 12 | return dist2balls(control_points, control_point_radii, **params) 13 | elif type == 'box': 14 | return dist2boxes(control_points, control_point_radii, **params) 15 | else: 16 | raise ValueError 17 | 18 | def dist2balls(control_points, control_point_radii, obstacle_centers, obstacle_radii): 19 | """ 20 | compute the distance between control point balls and obstacle balls 21 | :param control_points: batch_size x num_control_points x dimension, control point positions 22 | :param control_point_radii: 1 x num_control_points, control point ball radii 23 | obstacle_centers: batch_size x num_obstacles x dimension, obstacle center positions 24 | obstacle_radii: batch_size x num_obstacles, obstacle radii 25 | :return obstacle_dists: batch_size x num_control_points x num_obstacles, 26 | distance between control point balls and obstacle boxes 27 | """ 28 | center_dists = pdist2(control_points, obstacle_centers) 29 | obstacle_radii = tf.expand_dims(obstacle_radii, 1) 30 | arm_radii = tf.expand_dims(control_point_radii, -1) 31 | collision_radii = arm_radii + obstacle_radii 32 | 33 | obstacle_dists = center_dists - collision_radii 34 | obstacle_dists = tf.reshape(obstacle_dists, (-1, 1)) 35 | 36 | return obstacle_dists 37 | 38 | def dist2boxes(control_points, control_point_radii, obstacle_mins, obstacle_maxs): 39 | """ 40 | compute the distance between control point balls and obstacle boxes 41 | :param control_points: batch_size x num_control_points x dimension, control point positions 42 | :param control_point_radii: 1 x num_control_points, control point ball radii 43 | :param obstacle_mins: batch_size x num_obstacles x dimension, lower corners of obstacle boxes 44 | :param obstacle_maxs: batch_size x num_obstacles x dimension, upper corner of obstacle boxes 45 | :return obstacle_dists: batch_size x num_control_points x num_obstacles, 46 | distance between control point balls and obstacle boxes 47 | """ 48 | control_points = tf.expand_dims(control_points, 2) 49 | obstacle_mins = tf.expand_dims(obstacle_mins, 1) 50 | obstacle_maxs = tf.expand_dims(obstacle_maxs, 1) 51 | control_points_proj = tf.minimum( 52 | tf.maximum(control_points, obstacle_mins), 53 | obstacle_maxs) 54 | obstacle_dists = tf.norm(control_points - control_points_proj, axis=-1) 55 | obstacle_dists = obstacle_dists - tf.expand_dims(tf.expand_dims(control_point_radii, -1), -1) 56 | obstacle_dists = tf.reshape(obstacle_dists, (-1, 1)) 57 | 58 | return obstacle_dists -------------------------------------------------------------------------------- /rmp2/urdf/franka_panda/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /rmp2/urdf/franka_panda/panda.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/collision/finger.obj: -------------------------------------------------------------------------------- 1 | # File produced by Open Asset Import Library (http://www.assimp.sf.net) 2 | # (assimp v3.0.1262) 3 | 4 | mtllib finger.stl.obj.mtl 5 | 6 | # 96 vertex positions 7 | v 0.01036 0.0264034 0.000154629 8 | v 0.0104486 0.0025833 0.000146801 9 | v -0.0103872 0.00253418 0.000131696 10 | v -0.0103872 0.00253418 0.000131696 11 | v -0.0104795 0.0161016 0.0189882 12 | v -0.0104013 0.0263094 0.00016651 13 | v -0.0104013 0.0263094 0.00016651 14 | v -0.0104795 0.0161016 0.0189882 15 | v -0.0103889 0.0252203 0.0191876 16 | v 0.0104486 0.0025833 0.000146801 17 | v -0.0087304 -2.35252e-05 0.0361648 18 | v -0.0103872 0.00253418 0.000131696 19 | v 0.0104005 0.0252534 0.0190366 20 | v -0.0103889 0.0252203 0.0191876 21 | v 0.00583983 0.0142743 0.0538035 22 | v -0.0103872 0.00253418 0.000131696 23 | v -0.0104013 0.0263094 0.00016651 24 | v 0.01036 0.0264034 0.000154629 25 | v 0.00861608 0.0139887 0.0513279 26 | v 0.0104948 0.0151026 0.0184356 27 | v 0.0104005 0.0252534 0.0190366 28 | v 0.0104948 0.0151026 0.0184356 29 | v 0.00869277 -0.000132643 0.0501662 30 | v 0.0104486 0.0025833 0.000146801 31 | v 0.00861608 0.0139887 0.0513279 32 | v 0.00869277 -0.000132643 0.0501662 33 | v 0.0104948 0.0151026 0.0184356 34 | v -0.00862294 -5.68019e-05 0.0509528 35 | v -0.0103872 0.00253418 0.000131696 36 | v -0.0087304 -2.35252e-05 0.0361648 37 | v 0.0104486 0.0025833 0.000146801 38 | v 0.00869277 -0.000132643 0.0501662 39 | v -0.0087304 -2.35252e-05 0.0361648 40 | v 0.00869277 -0.000132643 0.0501662 41 | v -0.00548142 -9.11208e-05 0.0537247 42 | v -0.0087304 -2.35252e-05 0.0361648 43 | v 0.0104005 0.0252534 0.0190366 44 | v 0.0104948 0.0151026 0.0184356 45 | v 0.0104486 0.0025833 0.000146801 46 | v 0.0104005 0.0252534 0.0190366 47 | v 0.0104486 0.0025833 0.000146801 48 | v 0.01036 0.0264034 0.000154629 49 | v 0.0104005 0.0252534 0.0190366 50 | v 0.01036 0.0264034 0.000154629 51 | v -0.0103889 0.0252203 0.0191876 52 | v -0.0103889 0.0252203 0.0191876 53 | v -0.00527792 0.0142931 0.053849 54 | v 0.00583983 0.0142743 0.0538035 55 | v -0.00777838 0.0142182 0.0523659 56 | v -0.00527792 0.0142931 0.053849 57 | v -0.0103889 0.0252203 0.0191876 58 | v -0.00862294 -5.68019e-05 0.0509528 59 | v -0.00548142 -9.11208e-05 0.0537247 60 | v -0.00777838 0.0142182 0.0523659 61 | v -0.0103872 0.00253418 0.000131696 62 | v -0.00862294 -5.68019e-05 0.0509528 63 | v -0.0104795 0.0161016 0.0189882 64 | v 0.0104005 0.0252534 0.0190366 65 | v 0.00583983 0.0142743 0.0538035 66 | v 0.00861608 0.0139887 0.0513279 67 | v 0.01036 0.0264034 0.000154629 68 | v -0.0104013 0.0263094 0.00016651 69 | v -0.0103889 0.0252203 0.0191876 70 | v -0.0104795 0.0161016 0.0189882 71 | v -0.00884117 0.0139176 0.0505894 72 | v -0.0103889 0.0252203 0.0191876 73 | v -0.00884117 0.0139176 0.0505894 74 | v -0.00777838 0.0142182 0.0523659 75 | v -0.0103889 0.0252203 0.0191876 76 | v 0.00583983 0.0142743 0.0538035 77 | v -0.00548142 -9.11208e-05 0.0537247 78 | v 0.00613802 -2.06026e-05 0.0535776 79 | v -0.00527792 0.0142931 0.053849 80 | v -0.00548142 -9.11208e-05 0.0537247 81 | v 0.00583983 0.0142743 0.0538035 82 | v 0.00869277 -0.000132643 0.0501662 83 | v 0.00613802 -2.06026e-05 0.0535776 84 | v -0.00548142 -9.11208e-05 0.0537247 85 | v 0.00613802 -2.06026e-05 0.0535776 86 | v 0.00869277 -0.000132643 0.0501662 87 | v 0.00861608 0.0139887 0.0513279 88 | v -0.00548142 -9.11208e-05 0.0537247 89 | v -0.00862294 -5.68019e-05 0.0509528 90 | v -0.0087304 -2.35252e-05 0.0361648 91 | v -0.00777838 0.0142182 0.0523659 92 | v -0.00548142 -9.11208e-05 0.0537247 93 | v -0.00527792 0.0142931 0.053849 94 | v -0.00862294 -5.68019e-05 0.0509528 95 | v -0.00884117 0.0139176 0.0505894 96 | v -0.0104795 0.0161016 0.0189882 97 | v 0.00613802 -2.06026e-05 0.0535776 98 | v 0.00861608 0.0139887 0.0513279 99 | v 0.00583983 0.0142743 0.0538035 100 | v -0.00777838 0.0142182 0.0523659 101 | v -0.00884117 0.0139176 0.0505894 102 | v -0.00862294 -5.68019e-05 0.0509528 103 | 104 | # 0 UV coordinates 105 | 106 | # 96 vertex normals 107 | vn 0.000724164 0.000331308 -1 108 | vn 0.000724164 0.000331308 -1 109 | vn 0.000724164 0.000331308 -1 110 | vn -0.99999 -0.000585782 -0.00447174 111 | vn -0.99999 -0.000585782 -0.00447174 112 | vn -0.99999 -0.000585782 -0.00447174 113 | vn -0.99995 0.00990875 0.00122006 114 | vn -0.99995 0.00990875 0.00122006 115 | vn -0.99995 0.00990875 0.00122006 116 | vn 0.00240304 -0.997479 -0.0709137 117 | vn 0.00240304 -0.997479 -0.0709137 118 | vn 0.00240304 -0.997479 -0.0709137 119 | vn 0.000668274 0.953556 0.301214 120 | vn 0.000668274 0.953556 0.301214 121 | vn 0.000668274 0.953556 0.301214 122 | vn -0.0005789 0.00146393 -0.999999 123 | vn -0.0005789 0.00146393 -0.999999 124 | vn -0.0005789 0.00146393 -0.999999 125 | vn 0.998344 0.00589157 0.0572229 126 | vn 0.998344 0.00589157 0.0572229 127 | vn 0.998344 0.00589157 0.0572229 128 | vn 0.998185 -0.050838 0.0322792 129 | vn 0.998185 -0.050838 0.0322792 130 | vn 0.998185 -0.050838 0.0322792 131 | vn 0.998371 0.000729252 0.0570496 132 | vn 0.998371 0.000729252 0.0570496 133 | vn 0.998371 0.000729252 0.0570496 134 | vn -0.871287 -0.490747 0.00522707 135 | vn -0.871287 -0.490747 0.00522707 136 | vn -0.871287 -0.490747 0.00522707 137 | vn 0.0362712 -0.99794 -0.0529128 138 | vn 0.0362712 -0.99794 -0.0529128 139 | vn 0.0362712 -0.99794 -0.0529128 140 | vn -0.00372285 -0.999988 -0.00316058 141 | vn -0.00372285 -0.999988 -0.00316058 142 | vn -0.00372285 -0.999988 -0.00316058 143 | vn 0.999909 0.00984192 -0.00926313 144 | vn 0.999909 0.00984192 -0.00926313 145 | vn 0.999909 0.00984192 -0.00926313 146 | vn 0.999991 0.00372285 -0.00191858 147 | vn 0.999991 0.00372285 -0.00191858 148 | vn 0.999991 0.00372285 -0.00191858 149 | vn -0.0011495 0.99815 0.0607926 150 | vn -0.0011495 0.99815 0.0607926 151 | vn -0.0011495 0.99815 0.0607926 152 | vn 0.00284562 0.953846 0.300284 153 | vn 0.00284562 0.953846 0.300284 154 | vn 0.00284562 0.953846 0.300284 155 | vn -0.218924 0.920873 0.32259 156 | vn -0.218924 0.920873 0.32259 157 | vn -0.218924 0.920873 0.32259 158 | vn -0.661425 -0.0350314 0.749193 159 | vn -0.661425 -0.0350314 0.749193 160 | vn -0.661425 -0.0350314 0.749193 161 | vn -0.998169 -0.0513123 0.0320353 162 | vn -0.998169 -0.0513123 0.0320353 163 | vn -0.998169 -0.0513123 0.0320353 164 | vn 0.377717 0.867563 0.323518 165 | vn 0.377717 0.867563 0.323518 166 | vn 0.377717 0.867563 0.323518 167 | vn -0.00448618 0.998355 0.0571685 168 | vn -0.00448618 0.998355 0.0571685 169 | vn -0.00448618 0.998355 0.0571685 170 | vn -0.998589 0.0087761 0.0523757 171 | vn -0.998589 0.0087761 0.0523757 172 | vn -0.998589 0.0087761 0.0523757 173 | vn -0.665951 0.690851 0.281485 174 | vn -0.665951 0.690851 0.281485 175 | vn -0.665951 0.690851 0.281485 176 | vn 0.0127505 -0.0155288 0.999798 177 | vn 0.0127505 -0.0155288 0.999798 178 | vn 0.0127505 -0.0155288 0.999798 179 | vn 0.00408502 -0.00870048 0.999954 180 | vn 0.00408502 -0.00870048 0.999954 181 | vn 0.00408502 -0.00870048 0.999954 182 | vn 0.006542 -0.999267 0.0377181 183 | vn 0.006542 -0.999267 0.0377181 184 | vn 0.006542 -0.999267 0.0377181 185 | vn 0.798906 -0.0450007 0.59977 186 | vn 0.798906 -0.0450007 0.59977 187 | vn 0.798906 -0.0450007 0.59977 188 | vn -0.00899609 -0.999957 -0.00218479 189 | vn -0.00899609 -0.999957 -0.00218479 190 | vn -0.00899609 -0.999957 -0.00218479 191 | vn -0.510144 -0.000216662 0.860089 192 | vn -0.510144 -0.000216662 0.860089 193 | vn -0.510144 -0.000216662 0.860089 194 | vn -0.998608 -0.0142745 0.0507836 195 | vn -0.998608 -0.0142745 0.0507836 196 | vn -0.998608 -0.0142745 0.0507836 197 | vn 0.665648 0.00209596 0.746263 198 | vn 0.665648 0.00209596 0.746263 199 | vn 0.665648 0.00209596 0.746263 200 | vn -0.858159 -4.90786e-05 0.513384 201 | vn -0.858159 -4.90786e-05 0.513384 202 | vn -0.858159 -4.90786e-05 0.513384 203 | 204 | # Mesh '' with 32 faces 205 | g 206 | usemtl DefaultMaterial 207 | f 1//1 2//2 3//3 208 | f 4//4 5//5 6//6 209 | f 7//7 8//8 9//9 210 | f 10//10 11//11 12//12 211 | f 13//13 14//14 15//15 212 | f 16//16 17//17 18//18 213 | f 19//19 20//20 21//21 214 | f 22//22 23//23 24//24 215 | f 25//25 26//26 27//27 216 | f 28//28 29//29 30//30 217 | f 31//31 32//32 33//33 218 | f 34//34 35//35 36//36 219 | f 37//37 38//38 39//39 220 | f 40//40 41//41 42//42 221 | f 43//43 44//44 45//45 222 | f 46//46 47//47 48//48 223 | f 49//49 50//50 51//51 224 | f 52//52 53//53 54//54 225 | f 55//55 56//56 57//57 226 | f 58//58 59//59 60//60 227 | f 61//61 62//62 63//63 228 | f 64//64 65//65 66//66 229 | f 67//67 68//68 69//69 230 | f 70//70 71//71 72//72 231 | f 73//73 74//74 75//75 232 | f 76//76 77//77 78//78 233 | f 79//79 80//80 81//81 234 | f 82//82 83//83 84//84 235 | f 85//85 86//86 87//87 236 | f 88//88 89//89 90//90 237 | f 91//91 92//92 93//93 238 | f 94//94 95//95 96//96 239 | 240 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/collision/link6.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'link2.blend' 2 | # Material Count: 17 3 | 4 | newmtl Face064_002_001_002_001.001 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 1.000000 0.000000 0.000000 8 | Ks 0.003906 0.003906 0.003906 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl Face065_002_001_002_001.001 15 | Ns -1.960784 16 | Ka 1.000000 1.000000 1.000000 17 | Kd 0.000000 1.000000 0.000000 18 | Ks 0.003906 0.003906 0.003906 19 | Ke 0.000000 0.000000 0.000000 20 | Ni 1.000000 21 | d 1.000000 22 | illum 2 23 | 24 | newmtl Face374_002_001_002_001.001 25 | Ns -1.960784 26 | Ka 1.000000 1.000000 1.000000 27 | Kd 1.000000 1.000000 1.000000 28 | Ks 0.003906 0.003906 0.003906 29 | Ke 0.000000 0.000000 0.000000 30 | Ni 1.000000 31 | d 1.000000 32 | illum 2 33 | 34 | newmtl Face539_002_001_002_001.001 35 | Ns -1.960784 36 | Ka 1.000000 1.000000 1.000000 37 | Kd 0.250980 0.250980 0.250980 38 | Ks 0.003906 0.003906 0.003906 39 | Ke 0.000000 0.000000 0.000000 40 | Ni 1.000000 41 | d 1.000000 42 | illum 2 43 | map_Kd D:\dev\pybullet_robots\data\franka_panda\meshes\visual\colors.png 44 | 45 | newmtl Part__Feature001_009_001_002_001.001 46 | Ns -1.960784 47 | Ka 1.000000 1.000000 1.000000 48 | Kd 0.250980 0.250980 0.250980 49 | Ks 0.003906 0.003906 0.003906 50 | Ke 0.000000 0.000000 0.000000 51 | Ni 1.000000 52 | d 1.000000 53 | illum 2 54 | map_Kd D:\dev\pybullet_robots\data\franka_panda\meshes\visual\colors.png 55 | 56 | newmtl Part__Feature002_006_001_002_001.001 57 | Ns -1.960784 58 | Ka 1.000000 1.000000 1.000000 59 | Kd 0.250980 0.250980 0.250980 60 | Ks 0.003906 0.003906 0.003906 61 | Ke 0.000000 0.000000 0.000000 62 | Ni 1.000000 63 | d 1.000000 64 | illum 2 65 | map_Kd D:\dev\pybullet_robots\data\franka_panda\meshes\visual\colors.png 66 | 67 | newmtl Shell002_002_001_002_001.001 68 | Ns -1.960784 69 | Ka 1.000000 1.000000 1.000000 70 | Kd 1.000000 1.000000 1.000000 71 | Ks 0.003906 0.003906 0.003906 72 | Ke 0.000000 0.000000 0.000000 73 | Ni 1.000000 74 | d 1.000000 75 | illum 2 76 | 77 | newmtl Shell003_002_001_002_001.001 78 | Ns -1.960784 79 | Ka 1.000000 1.000000 1.000000 80 | Kd 1.000000 1.000000 1.000000 81 | Ks 0.003906 0.003906 0.003906 82 | Ke 0.000000 0.000000 0.000000 83 | Ni 1.000000 84 | d 1.000000 85 | illum 2 86 | 87 | newmtl Shell004_001_001_002_001.001 88 | Ns -1.960784 89 | Ka 1.000000 1.000000 1.000000 90 | Kd 1.000000 1.000000 1.000000 91 | Ks 0.003906 0.003906 0.003906 92 | Ke 0.000000 0.000000 0.000000 93 | Ni 1.000000 94 | d 1.000000 95 | illum 2 96 | 97 | newmtl Shell005_001_001_002_001.001 98 | Ns -1.960784 99 | Ka 1.000000 1.000000 1.000000 100 | Kd 1.000000 1.000000 1.000000 101 | Ks 0.003906 0.003906 0.003906 102 | Ke 0.000000 0.000000 0.000000 103 | Ni 1.000000 104 | d 1.000000 105 | illum 2 106 | 107 | newmtl Shell006_003_002_001.001 108 | Ns -1.960784 109 | Ka 1.000000 1.000000 1.000000 110 | Kd 0.901961 0.921569 0.929412 111 | Ks 0.015625 0.015625 0.015625 112 | Ke 0.000000 0.000000 0.000000 113 | Ni 1.000000 114 | d 1.000000 115 | illum 2 116 | 117 | newmtl Shell007_002_002_001.001 118 | Ns -1.960784 119 | Ka 1.000000 1.000000 1.000000 120 | Kd 0.250000 0.250000 0.250000 121 | Ks 0.003906 0.003906 0.003906 122 | Ke 0.000000 0.000000 0.000000 123 | Ni 1.000000 124 | d 1.000000 125 | illum 2 126 | 127 | newmtl Shell011_002_002_001.001 128 | Ns -1.960784 129 | Ka 1.000000 1.000000 1.000000 130 | Kd 1.000000 1.000000 1.000000 131 | Ks 0.003906 0.003906 0.003906 132 | Ke 0.000000 0.000000 0.000000 133 | Ni 1.000000 134 | d 1.000000 135 | illum 2 136 | 137 | newmtl Shell012_002_002_001.001 138 | Ns -1.960784 139 | Ka 1.000000 1.000000 1.000000 140 | Kd 1.000000 1.000000 1.000000 141 | Ks 0.003906 0.003906 0.003906 142 | Ke 0.000000 0.000000 0.000000 143 | Ni 1.000000 144 | d 1.000000 145 | illum 2 146 | map_Kd D:\dev\pybullet_robots\data\franka_panda\meshes\visual\colors.png 147 | 148 | newmtl Shell_003_001_002_001.001 149 | Ns -1.960784 150 | Ka 1.000000 1.000000 1.000000 151 | Kd 0.250980 0.250980 0.250980 152 | Ks 0.003906 0.003906 0.003906 153 | Ke 0.000000 0.000000 0.000000 154 | Ni 1.000000 155 | d 1.000000 156 | illum 2 157 | map_Kd D:\dev\pybullet_robots\data\franka_panda\meshes\visual\colors.png 158 | 159 | newmtl Union001_001_001_002_001.001 160 | Ns -1.960784 161 | Ka 1.000000 1.000000 1.000000 162 | Kd 0.039216 0.541176 0.780392 163 | Ks 0.003906 0.003906 0.003906 164 | Ke 0.000000 0.000000 0.000000 165 | Ni 1.000000 166 | d 1.000000 167 | illum 2 168 | 169 | newmtl Union_001_001_002_001.001 170 | Ns -1.960784 171 | Ka 1.000000 1.000000 1.000000 172 | Kd 0.039216 0.541176 0.780392 173 | Ks 0.003906 0.003906 0.003906 174 | Ke 0.000000 0.000000 0.000000 175 | Ni 1.000000 176 | d 1.000000 177 | illum 2 178 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/colors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UWRobotLearning/rmp2/c612a014f517204b38c552619a441be4b3d7b67f/rmp2/urdf/meshes/visual/colors.png -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/finger.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'link2.blend' 2 | # Material Count: 2 3 | 4 | newmtl Part__Feature001_006.001 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.901961 0.921569 0.929412 8 | Ks 0.250000 0.250000 0.250000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | map_Kd colors.png 14 | 15 | newmtl Part__Feature_007.001 16 | Ns -1.960784 17 | Ka 1.000000 1.000000 1.000000 18 | Kd 0.250980 0.250980 0.250980 19 | Ks 0.250000 0.250000 0.250000 20 | Ke 0.000000 0.000000 0.000000 21 | Ni 1.000000 22 | d 1.000000 23 | illum 2 24 | map_Kd colors.png 25 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/hand.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'link2.blend' 2 | # Material Count: 5 3 | 4 | newmtl Part__Feature001_008_005.001 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.250980 0.250980 0.250980 8 | Ks 0.007812 0.007812 0.007812 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | map_Kd colors.png 14 | 15 | newmtl Part__Feature002_005_005.001 16 | Ns -1.960784 17 | Ka 1.000000 1.000000 1.000000 18 | Kd 0.901961 0.921569 0.929412 19 | Ks 0.015625 0.015625 0.015625 20 | Ke 0.000000 0.000000 0.000000 21 | Ni 1.000000 22 | d 1.000000 23 | illum 2 24 | 25 | newmtl Part__Feature005_001_005.001 26 | Ns -1.960784 27 | Ka 1.000000 1.000000 1.000000 28 | Kd 1.000000 1.000000 1.000000 29 | Ks 0.015625 0.015625 0.015625 30 | Ke 0.000000 0.000000 0.000000 31 | Ni 1.000000 32 | d 1.000000 33 | illum 2 34 | map_Kd colors.png 35 | 36 | newmtl Part__Feature005_001_005_001.001 37 | Ns -1.960784 38 | Ka 1.000000 1.000000 1.000000 39 | Kd 0.901961 0.921569 0.929412 40 | Ks 0.015625 0.015625 0.015625 41 | Ke 0.000000 0.000000 0.000000 42 | Ni 1.000000 43 | d 1.000000 44 | illum 2 45 | map_Kd colors.png 46 | 47 | newmtl Part__Feature_009_005.001 48 | Ns -1.960784 49 | Ka 1.000000 1.000000 1.000000 50 | Kd 0.250980 0.250980 0.250980 51 | Ks 0.015625 0.015625 0.015625 52 | Ke 0.000000 0.000000 0.000000 53 | Ni 1.000000 54 | d 1.000000 55 | illum 2 56 | map_Kd colors.png 57 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/link1.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'link2.blend' 2 | # Material Count: 1 3 | 4 | newmtl Part__Feature_001 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 1.000000 1.000000 1.000000 8 | Ks 0.062500 0.062500 0.062500 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | map_Kd colors.png 14 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/link2.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'link3.blend' 2 | # Material Count: 1 3 | 4 | newmtl Part__Feature024 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 1.000000 1.000000 1.000000 8 | Ks 0.125000 0.125000 0.125000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | map_Kd colors.png 14 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/link3.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'link4.blend' 2 | # Material Count: 4 3 | 4 | newmtl Part__Feature001_010_001_002 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 1.000000 1.000000 1.000000 8 | Ks 0.007812 0.007812 0.007812 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | map_Kd colors.png 14 | 15 | newmtl Part__Feature002_007_001_002 16 | Ns -1.960784 17 | Ka 1.000000 1.000000 1.000000 18 | Kd 1.000000 1.000000 1.000000 19 | Ks 0.007812 0.007812 0.007812 20 | Ke 0.000000 0.000000 0.000000 21 | Ni 1.000000 22 | d 1.000000 23 | illum 2 24 | map_Kd colors.png 25 | 26 | newmtl Part__Feature003_004_001_002 27 | Ns -1.960784 28 | Ka 1.000000 1.000000 1.000000 29 | Kd 1.000000 1.000000 1.000000 30 | Ks 0.007812 0.007812 0.007812 31 | Ke 0.000000 0.000000 0.000000 32 | Ni 1.000000 33 | d 1.000000 34 | illum 2 35 | map_Kd colors.png 36 | 37 | newmtl Part__Feature_001_001_001_002 38 | Ns -1.960784 39 | Ka 1.000000 1.000000 1.000000 40 | Kd 0.250980 0.250980 0.250980 41 | Ks 0.007812 0.007812 0.007812 42 | Ke 0.000000 0.000000 0.000000 43 | Ni 1.000000 44 | d 1.000000 45 | illum 2 46 | map_Kd colors.png 47 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/link4.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'link4.blend' 2 | # Material Count: 4 3 | 4 | newmtl Part__Feature001_001_003_001.001 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 1.000000 1.000000 1.000000 8 | Ks 0.007812 0.007812 0.007812 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl Part__Feature002_001_003_001.001 15 | Ns -1.960784 16 | Ka 1.000000 1.000000 1.000000 17 | Kd 0.250980 0.250980 0.250980 18 | Ks 0.007812 0.007812 0.007812 19 | Ke 0.000000 0.000000 0.000000 20 | Ni 1.000000 21 | d 1.000000 22 | illum 2 23 | map_Kd colors.png 24 | 25 | newmtl Part__Feature003_001_003_001.001 26 | Ns -1.960784 27 | Ka 1.000000 1.000000 1.000000 28 | Kd 1.000000 1.000000 1.000000 29 | Ks 0.007812 0.007812 0.007812 30 | Ke 0.000000 0.000000 0.000000 31 | Ni 1.000000 32 | d 1.000000 33 | illum 2 34 | map_Kd colors.png 35 | 36 | newmtl Part__Feature_002_003_001.001 37 | Ns -1.960784 38 | Ka 1.000000 1.000000 1.000000 39 | Kd 1.000000 1.000000 1.000000 40 | Ks 0.007812 0.007812 0.007812 41 | Ke 0.000000 0.000000 0.000000 42 | Ni 1.000000 43 | d 1.000000 44 | illum 2 45 | map_Kd colors.png 46 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/link5.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 3 3 | 4 | newmtl Part__Feature_002_004_003.002 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 1.000000 1.000000 1.000000 8 | Ks 0.015625 0.015625 0.015625 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | map_Kd colors.png 14 | 15 | newmtl Shell001_001_001_003.002 16 | Ns -1.960784 17 | Ka 1.000000 1.000000 1.000000 18 | Kd 0.250000 0.250000 0.250000 19 | Ks 0.015625 0.015625 0.015625 20 | Ke 0.000000 0.000000 0.000000 21 | Ni 1.000000 22 | d 1.000000 23 | illum 2 24 | map_Kd colors.png 25 | 26 | newmtl Shell_001_001_003.002 27 | Ns -1.960784 28 | Ka 1.000000 1.000000 1.000000 29 | Kd 1.000000 1.000000 1.000000 30 | Ks 0.015625 0.015625 0.015625 31 | Ke 0.000000 0.000000 0.000000 32 | Ni 1.000000 33 | d 1.000000 34 | illum 2 35 | -------------------------------------------------------------------------------- /rmp2/urdf/meshes/visual/link6.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'link2.blend' 2 | # Material Count: 17 3 | 4 | newmtl Face064_002_001_002_001.001 5 | Ns -1.960784 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 1.000000 0.000000 0.000000 8 | Ks 0.003906 0.003906 0.003906 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl Face065_002_001_002_001.001 15 | Ns -1.960784 16 | Ka 1.000000 1.000000 1.000000 17 | Kd 0.000000 1.000000 0.000000 18 | Ks 0.003906 0.003906 0.003906 19 | Ke 0.000000 0.000000 0.000000 20 | Ni 1.000000 21 | d 1.000000 22 | illum 2 23 | 24 | newmtl Face374_002_001_002_001.001 25 | Ns -1.960784 26 | Ka 1.000000 1.000000 1.000000 27 | Kd 1.000000 1.000000 1.000000 28 | Ks 0.003906 0.003906 0.003906 29 | Ke 0.000000 0.000000 0.000000 30 | Ni 1.000000 31 | d 1.000000 32 | illum 2 33 | 34 | newmtl Face539_002_001_002_001.001 35 | Ns -1.960784 36 | Ka 1.000000 1.000000 1.000000 37 | Kd 0.250980 0.250980 0.250980 38 | Ks 0.003906 0.003906 0.003906 39 | Ke 0.000000 0.000000 0.000000 40 | Ni 1.000000 41 | d 1.000000 42 | illum 2 43 | map_Kd colors.png 44 | 45 | newmtl Part__Feature001_009_001_002_001.001 46 | Ns -1.960784 47 | Ka 1.000000 1.000000 1.000000 48 | Kd 0.250980 0.250980 0.250980 49 | Ks 0.003906 0.003906 0.003906 50 | Ke 0.000000 0.000000 0.000000 51 | Ni 1.000000 52 | d 1.000000 53 | illum 2 54 | map_Kd colors.png 55 | 56 | newmtl Part__Feature002_006_001_002_001.001 57 | Ns -1.960784 58 | Ka 1.000000 1.000000 1.000000 59 | Kd 0.250980 0.250980 0.250980 60 | Ks 0.003906 0.003906 0.003906 61 | Ke 0.000000 0.000000 0.000000 62 | Ni 1.000000 63 | d 1.000000 64 | illum 2 65 | map_Kd colors.png 66 | 67 | newmtl Shell002_002_001_002_001.001 68 | Ns -1.960784 69 | Ka 1.000000 1.000000 1.000000 70 | Kd 1.000000 1.000000 1.000000 71 | Ks 0.003906 0.003906 0.003906 72 | Ke 0.000000 0.000000 0.000000 73 | Ni 1.000000 74 | d 1.000000 75 | illum 2 76 | 77 | newmtl Shell003_002_001_002_001.001 78 | Ns -1.960784 79 | Ka 1.000000 1.000000 1.000000 80 | Kd 1.000000 1.000000 1.000000 81 | Ks 0.003906 0.003906 0.003906 82 | Ke 0.000000 0.000000 0.000000 83 | Ni 1.000000 84 | d 1.000000 85 | illum 2 86 | 87 | newmtl Shell004_001_001_002_001.001 88 | Ns -1.960784 89 | Ka 1.000000 1.000000 1.000000 90 | Kd 1.000000 1.000000 1.000000 91 | Ks 0.003906 0.003906 0.003906 92 | Ke 0.000000 0.000000 0.000000 93 | Ni 1.000000 94 | d 1.000000 95 | illum 2 96 | 97 | newmtl Shell005_001_001_002_001.001 98 | Ns -1.960784 99 | Ka 1.000000 1.000000 1.000000 100 | Kd 1.000000 1.000000 1.000000 101 | Ks 0.003906 0.003906 0.003906 102 | Ke 0.000000 0.000000 0.000000 103 | Ni 1.000000 104 | d 1.000000 105 | illum 2 106 | 107 | newmtl Shell006_003_002_001.001 108 | Ns -1.960784 109 | Ka 1.000000 1.000000 1.000000 110 | Kd 0.901961 0.921569 0.929412 111 | Ks 0.015625 0.015625 0.015625 112 | Ke 0.000000 0.000000 0.000000 113 | Ni 1.000000 114 | d 1.000000 115 | illum 2 116 | 117 | newmtl Shell007_002_002_001.001 118 | Ns -1.960784 119 | Ka 1.000000 1.000000 1.000000 120 | Kd 0.250000 0.250000 0.250000 121 | Ks 0.003906 0.003906 0.003906 122 | Ke 0.000000 0.000000 0.000000 123 | Ni 1.000000 124 | d 1.000000 125 | illum 2 126 | 127 | newmtl Shell011_002_002_001.001 128 | Ns -1.960784 129 | Ka 1.000000 1.000000 1.000000 130 | Kd 1.000000 1.000000 1.000000 131 | Ks 0.003906 0.003906 0.003906 132 | Ke 0.000000 0.000000 0.000000 133 | Ni 1.000000 134 | d 1.000000 135 | illum 2 136 | 137 | newmtl Shell012_002_002_001.001 138 | Ns -1.960784 139 | Ka 1.000000 1.000000 1.000000 140 | Kd 1.000000 1.000000 1.000000 141 | Ks 0.003906 0.003906 0.003906 142 | Ke 0.000000 0.000000 0.000000 143 | Ni 1.000000 144 | d 1.000000 145 | illum 2 146 | map_Kd colors.png 147 | 148 | newmtl Shell_003_001_002_001.001 149 | Ns -1.960784 150 | Ka 1.000000 1.000000 1.000000 151 | Kd 0.250980 0.250980 0.250980 152 | Ks 0.003906 0.003906 0.003906 153 | Ke 0.000000 0.000000 0.000000 154 | Ni 1.000000 155 | d 1.000000 156 | illum 2 157 | map_Kd colors.png 158 | 159 | newmtl Union001_001_001_002_001.001 160 | Ns -1.960784 161 | Ka 1.000000 1.000000 1.000000 162 | Kd 0.039216 0.541176 0.780392 163 | Ks 0.003906 0.003906 0.003906 164 | Ke 0.000000 0.000000 0.000000 165 | Ni 1.000000 166 | d 1.000000 167 | illum 2 168 | 169 | newmtl Union_001_001_002_001.001 170 | Ns -1.960784 171 | Ka 1.000000 1.000000 1.000000 172 | Kd 0.039216 0.541176 0.780392 173 | Ks 0.003906 0.003906 0.003906 174 | Ke 0.000000 0.000000 0.000000 175 | Ni 1.000000 176 | d 1.000000 177 | illum 2 178 | -------------------------------------------------------------------------------- /rmp2/urdf/three_link_planar_robot.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /rmp2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UWRobotLearning/rmp2/c612a014f517204b38c552619a441be4b3d7b67f/rmp2/utils/__init__.py -------------------------------------------------------------------------------- /rmp2/utils/bullet_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | helper functions for pybullet 3 | """ 4 | 5 | import pybullet as p 6 | 7 | def add_goal(bullet_client, position, radius=0.05, color=[0.0, 1.0, 0.0, 1]): 8 | collision = -1 9 | visual = bullet_client.createVisualShape(p.GEOM_SPHERE, radius=radius, 10 | rgbaColor=color) 11 | goal = bullet_client.createMultiBody(baseMass=0, 12 | baseCollisionShapeIndex=collision, 13 | baseVisualShapeIndex=visual, 14 | basePosition=position) 15 | return goal 16 | 17 | def add_collision_goal(bullet_client, position, radius=0.05, color=[0.0, 1.0, 0.0, 1]): 18 | collision = bullet_client.createCollisionShape(p.GEOM_SPHERE, radius=0.01) 19 | visual = -1 20 | goal = bullet_client.createMultiBody(baseMass=0, 21 | baseCollisionShapeIndex=collision, 22 | baseVisualShapeIndex=visual, 23 | basePosition=position) 24 | return goal 25 | 26 | 27 | def add_obstacle_ball(bullet_client, center, radius=0.1, color=[0.4, 0.4, 0.4, 1]): 28 | collision = p.createCollisionShape(p.GEOM_SPHERE, radius=radius) 29 | visual = p.createVisualShape(p.GEOM_SPHERE, radius=radius, 30 | rgbaColor=color) 31 | obstacle = p.createMultiBody(baseMass=0, 32 | baseCollisionShapeIndex=collision, 33 | baseVisualShapeIndex=visual, 34 | basePosition=center) 35 | return obstacle 36 | 37 | 38 | def add_obstacle_cylinder(bullet_client, center, radius=0.1, length=0.1, color=[0.4, 0.4, 0.4, 1]): 39 | collision = p.createCollisionShape(p.GEOM_CYLINDER, radius=radius, height=length) 40 | visual = p.createVisualShape(p.GEOM_CYLINDER, radius=radius, length=length, 41 | rgbaColor=color) 42 | obstacle = p.createMultiBody(baseMass=0, 43 | baseCollisionShapeIndex=collision, 44 | baseVisualShapeIndex=visual, 45 | basePosition=center) 46 | return obstacle -------------------------------------------------------------------------------- /rmp2/utils/env_wrappers.py: -------------------------------------------------------------------------------- 1 | """ 2 | wrapper classes for input/output of the environment 3 | """ 4 | 5 | from abc import ABC, abstractmethod 6 | import tensorflow as tf 7 | 8 | 9 | def load_env_wrapper(envid, dtype=tf.float32): 10 | if envid == '3link' or envid == '3link_residual': 11 | return ThreeLinkWrapper(dtype=dtype) 12 | elif envid == '3link_rmp': 13 | return ThreeLinkRMPWrapper(dtype=dtype) 14 | elif envid == 'franka' or envid == 'franka_residual': 15 | return FrankaWrapper(dtype=dtype) 16 | elif envid == 'franka_rmp': 17 | return FrankaRMPWrapper(dtype=dtype) 18 | else: 19 | raise ValueError 20 | 21 | 22 | class EnvWrapper(ABC): 23 | """ 24 | wrapper class for input/output of the environment 25 | """ 26 | @abstractmethod 27 | def obs_to_policy_input(self, obs): 28 | """ 29 | params obs: environment observation 30 | return policy_input: input to the policy 31 | """ 32 | 33 | 34 | @abstractmethod 35 | def obs_to_value_input(self, obs): 36 | """ 37 | params obs: environment observation 38 | return value_input: input to the value function 39 | """ 40 | 41 | @abstractmethod 42 | def policy_output_to_action(self, actions): 43 | """ 44 | params actions: output of the policy 45 | return env_actions: action to the environment 46 | """ 47 | 48 | 49 | 50 | class ThreeLinkWrapper(EnvWrapper): 51 | def __init__(self, dtype=tf.float32): 52 | self.dtype = dtype 53 | 54 | def obs_to_policy_input(self, obs): 55 | return tf.cast(obs, self.dtype) 56 | 57 | def obs_to_value_input(self, obs): 58 | return tf.cast(obs, self.dtype) 59 | 60 | @staticmethod 61 | def policy_output_to_action(actions): 62 | return actions 63 | 64 | 65 | class ThreeLinkRMPWrapper(EnvWrapper): 66 | def __init__(self, dtype='float32'): 67 | self.dtype = dtype 68 | 69 | def obs_to_policy_input(self, obs): 70 | obs = tf.cast(obs, self.dtype) 71 | batch_size, obs_dim = obs.shape 72 | batch_size, obs_dim = int(batch_size), int(obs_dim) 73 | 74 | num_obstacles = int((obs_dim - 17) / 5) 75 | 76 | x = tf.gather(obs, range(obs_dim - 4, obs_dim - 2), axis=1) 77 | xd = tf.gather(obs, range(obs_dim - 2, obs_dim), axis=1) 78 | goal = tf.gather(obs, range(obs_dim - 6, obs_dim - 4), axis=1) 79 | obstacles = tf.gather(obs, range(obs_dim - 6 - 3 * num_obstacles, obs_dim - 6), axis=1) 80 | return {'x': x, 'xd': xd, 'goal': goal, 'obstacles': obstacles} 81 | 82 | def obs_to_value_input(self, obs): 83 | obs = tf.cast(obs, self.dtype) 84 | batch_size, obs_dim = obs.shape 85 | batch_size, obs_dim = int(batch_size), int(obs_dim) 86 | obs_to_value = tf.gather(obs, range(0, obs_dim - 6), axis=1) 87 | return obs_to_value 88 | 89 | @staticmethod 90 | def policy_output_to_action(actions): 91 | return actions 92 | 93 | 94 | class ThreeLinkFullRMPWrapper(EnvWrapper): 95 | def __init__(self, dtype='float32'): 96 | self.dtype = dtype 97 | 98 | def obs_to_policy_input(self, obs): 99 | obs = tf.cast(obs, self.dtype) 100 | batch_size, obs_dim = obs.shape 101 | batch_size, obs_dim = int(batch_size), int(obs_dim) 102 | 103 | num_obstacles = int((obs_dim - 11) / 5) 104 | 105 | sin_q = tf.gather(obs, range(0, 3), axis=1) 106 | cos_q = tf.gather(obs, range(3, 6), axis=1) 107 | q = tf.atan2(sin_q, cos_q) 108 | qd = tf.gather(obs, range(6, 9), axis=1) 109 | obstacles = tf.gather(obs, range(obs_dim - 3 * num_obstacles, obs_dim), axis=1) 110 | return {'q': q, 'qd': qd, 'goal': None, 'obstacles': obstacles} 111 | 112 | def obs_to_value_input(self, obs): 113 | obs = tf.cast(obs, self.dtype) 114 | batch_size, obs_dim = obs.shape 115 | batch_size, obs_dim = int(batch_size), int(obs_dim) 116 | obs_to_value = tf.gather(obs, range(0, obs_dim - 6), axis=1) 117 | return obs_to_value 118 | 119 | @staticmethod 120 | def policy_output_to_action(actions): 121 | return actions 122 | 123 | 124 | class FrankaWrapper(EnvWrapper): 125 | def __init__(self, dtype=tf.float32): 126 | self.dtype = dtype 127 | 128 | def obs_to_policy_input(self, obs): 129 | return tf.cast(obs, self.dtype) 130 | 131 | def obs_to_value_input(self, obs): 132 | return tf.cast(obs, self.dtype) 133 | 134 | @staticmethod 135 | def policy_output_to_action(actions): 136 | return actions 137 | 138 | 139 | class FrankaRMPWrapper(EnvWrapper): 140 | def __init__(self, dtype='float32'): 141 | self.dtype = dtype 142 | 143 | def obs_to_policy_input(self, obs): 144 | obs = tf.cast(obs, self.dtype) 145 | batch_size, obs_dim = obs.shape 146 | batch_size, obs_dim = int(batch_size), int(obs_dim) 147 | 148 | num_obstacles = int((obs_dim - 33) / 7) 149 | 150 | x = tf.gather(obs, range(obs_dim - 6, obs_dim - 3), axis=1) 151 | xd = tf.gather(obs, range(obs_dim - 3, obs_dim), axis=1) 152 | goal = tf.gather(obs, range(obs_dim - 9, obs_dim - 6), axis=1) 153 | obstacles = tf.gather(obs, range(obs_dim - 9 - 4 * num_obstacles, obs_dim - 9), axis=1) 154 | return {'x': x, 'xd': xd, 'goal': goal, 'obstacles': obstacles} 155 | 156 | def obs_to_value_input(self, obs): 157 | obs = tf.cast(obs, self.dtype) 158 | batch_size, obs_dim = obs.shape 159 | batch_size, obs_dim = int(batch_size), int(obs_dim) 160 | obs_to_value = tf.gather(obs, range(0, obs_dim - 9), axis=1) 161 | return obs_to_value 162 | 163 | @staticmethod 164 | def policy_output_to_action(actions): 165 | return actions 166 | 167 | class FrankaFullRMPWrapper(EnvWrapper): 168 | def __init__(self, dtype='float32'): 169 | self.dtype = dtype 170 | 171 | def obs_to_policy_input(self, obs): 172 | obs = tf.cast(obs, self.dtype) 173 | batch_size, obs_dim = obs.shape 174 | batch_size, obs_dim = int(batch_size), int(obs_dim) 175 | 176 | num_obstacles = int((obs_dim - 24) / 7) 177 | 178 | sin_q = tf.gather(obs, range(0, 7), axis=1) 179 | cos_q = tf.gather(obs, range(7, 14), axis=1) 180 | q = tf.atan2(sin_q, cos_q) 181 | qd = tf.gather(obs, range(14, 21), axis=1) 182 | obstacles = tf.gather(obs, range(obs_dim - 4 * num_obstacles, obs_dim), axis=1) 183 | return {'q': q, 'qd': qd, 'goal': None, 'obstacles': obstacles} 184 | 185 | def obs_to_value_input(self, obs): 186 | obs = tf.cast(obs, self.dtype) 187 | batch_size, obs_dim = obs.shape 188 | batch_size, obs_dim = int(batch_size), int(obs_dim) 189 | obs_to_value = tf.gather(obs, range(0, obs_dim - 9), axis=1) 190 | return obs_to_value 191 | 192 | @staticmethod 193 | def policy_output_to_action(actions): 194 | return actions -------------------------------------------------------------------------------- /rmp2/utils/np_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | helper functions for sampling from torus in 2d and 3d 3 | """ 4 | 5 | import numpy as np 6 | 7 | def sample_from_torus_3d(random_generator, angle_center, angle_range, major_radius, minor_radius, height): 8 | while True: 9 | theta = random_generator.uniform( 10 | low=angle_center - angle_range / 2, 11 | high=angle_center + angle_range / 2) 12 | psi = random_generator.uniform(low=-np.pi, high=np.pi) 13 | r = random_generator.uniform(low=0., high=minor_radius) 14 | w = random_generator.random() 15 | if w <= (major_radius + r * np.cos(psi)) / (major_radius + minor_radius) * r / minor_radius: 16 | return np.array([ 17 | (major_radius + r * np.cos(psi)) * np.cos(theta), 18 | (major_radius + r * np.cos(psi)) * np.sin(theta), 19 | height + r * np.sin(psi) 20 | ]) 21 | 22 | 23 | def sample_from_torus_2d(random_generator, angle_center, angle_range, major_radius, minor_radius): 24 | while True: 25 | theta = random_generator.uniform( 26 | low=angle_center - angle_range / 2, 27 | high=angle_center + angle_range / 2) 28 | r = random_generator.uniform( 29 | low=major_radius - minor_radius, 30 | high=minor_radius + minor_radius) 31 | w = random_generator.random() 32 | if w <= r / (major_radius + minor_radius): 33 | return np.array([ 34 | r * np.cos(theta), 35 | r * np.sin(theta), 36 | ]) 37 | 38 | -------------------------------------------------------------------------------- /rmp2/utils/plot_configs.py: -------------------------------------------------------------------------------- 1 | # The first version was licensed as "Original Source License"(see below). 2 | # Several enhancements and at UW Robot Learning Lab 3 | # 4 | # Original Source License: 5 | # 6 | # Copyright (c) 2019 Georgia Tech Robot Learning Lab 7 | # Licensed under the MIT License. 8 | 9 | """ 10 | configs for ploting 11 | """ 12 | 13 | from matplotlib import cm 14 | from itertools import chain 15 | 16 | SET2COLORS = cm.get_cmap('Set2').colors 17 | SET2 = {'darkgreen': SET2COLORS[0], 18 | 'orange': SET2COLORS[1], 19 | 'blue': SET2COLORS[2], 20 | 'pink': SET2COLORS[3], 21 | 'lightgreen': SET2COLORS[4], 22 | 'gold': SET2COLORS[5], 23 | 'brown': SET2COLORS[6], 24 | 'grey': SET2COLORS[7], 25 | } 26 | 27 | SET1COLORS = cm.get_cmap('Set1').colors 28 | SET1 = { 29 | 'red': SET1COLORS[0], 30 | 'blue': SET1COLORS[1], 31 | 'green': SET1COLORS[2], 32 | 'purple': SET1COLORS[3], 33 | 'orange': SET1COLORS[4], 34 | 'yellow': SET1COLORS[5], 35 | 'brown': SET1COLORS[6], 36 | 'pink': SET1COLORS[7], 37 | 'grey': SET1COLORS[8] 38 | } 39 | code_configs = { 40 | 'bc-nn': (r'\textsc{BC+NN}', SET1['blue']), 41 | 'bc-rmp': (r'\textsc{BC+RMP}', SET1['purple']), 42 | 'code-nn': (r'\textsc{CODE+NN}', SET1['green']), 43 | 'code-rmp': (r'\textsc{CODE+RMP}', SET2['lightgreen']), 44 | 'order': [ 45 | 'bc-nn', 'bc-rmp', 'code-nn', 'code-rmp'] 46 | } 47 | 48 | rmp2_configs = { 49 | 'rmp': (r'\textsc{RMP}', SET2['lightgreen']), 50 | 'rmp-obs-feat': (r'\textsc{RMP-RESIDUAL}', SET1['blue']), 51 | 'nn': (r'\textsc{NN}', 'gray'), # SET1['grey']), 52 | 'nn-residual': (r'\textsc{NN-RESIDUAL}', 'indianred'), # SET1['red']), 53 | 'order': [ 54 | 'rmp-obs-feat', 'rmp', 'nn-residual', 'nn'] 55 | } 56 | 57 | gtc_configs = { 58 | 'rmp-obs-feat': (r'\textsc{STRUCTURED}', [0.4627451, 0.7254902, 0.]), 59 | 'nn': (r'\textsc{NN}', 'gray'), # SET1['grey']), 60 | 'order': [ 61 | 'rmp-obs-feat', 'nn'] 62 | } 63 | 64 | class Configs(object): 65 | def __init__(self, style=None, colormap=None): 66 | if not style: 67 | self.configs = None 68 | if colormap is None: 69 | c1 = iter(cm.get_cmap('Set1').colors) 70 | c2 = iter(cm.get_cmap('Set2').colors) 71 | c3 = iter(cm.get_cmap('Set3').colors) 72 | self.colors = chain(c1, c2, c3) 73 | else: 74 | self.colors = iter(cm.get_cmap(colormap).colors) 75 | else: 76 | self.configs = globals()[style + '_configs'] 77 | for exp_name in self.configs['order']: 78 | assert exp_name in self.configs, 'Unknown exp: {}'.format(exp_name) 79 | 80 | def color(self, exp_name): 81 | if self.configs is None: 82 | color = next(self.colors) 83 | else: 84 | color = self.configs[exp_name][1] 85 | return color 86 | 87 | def label(self, exp_name): 88 | if self.configs is None: 89 | return exp_name 90 | return self.configs[exp_name][0] 91 | 92 | def sort_dirs(self, dirs): 93 | if self.configs is None: 94 | return dirs 95 | 96 | def custom_key(exp_name): 97 | if exp_name in self.configs['order']: 98 | return self.configs['order'].index(exp_name) 99 | else: 100 | return 100 101 | return sorted(dirs, key=custom_key) 102 | -------------------------------------------------------------------------------- /rmp2/utils/python_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | python helper functions 3 | """ 4 | 5 | from contextlib import contextmanager 6 | import time 7 | 8 | @contextmanager 9 | def timing(msg, verbose=True): 10 | if verbose: 11 | # print("\t---------------------------------") 12 | print("\t[Timer] %s started..." % (msg)) 13 | tstart = time.time() 14 | yield 15 | print("\t[Timer] %s done in %.3f seconds" % (msg, time.time() - tstart)) 16 | print("\t---------------------------------") 17 | else: 18 | yield 19 | 20 | def merge_dicts(original, new_dict): 21 | if new_dict is not None: 22 | updated_dict = original.copy() 23 | updated_dict.update(new_dict) 24 | else: 25 | updated_dict = original 26 | return updated_dict 27 | -------------------------------------------------------------------------------- /rmp2/utils/rllib_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | registering the customized envs and models for rllib 3 | """ 4 | 5 | from ray.rllib.models import ModelCatalog 6 | from ray.tune.registry import register_env 7 | 8 | from rmp2.envs import * 9 | from rmp2.policies.gaussian_policy import DiagGaussianModel 10 | 11 | def register_envs_and_models(): 12 | def franka_env_creator(env_config): 13 | return FrankaEnv(env_config) 14 | register_env("franka", franka_env_creator) 15 | 16 | def franka_residual_env_creator(env_config): 17 | return FrankaResidualEnv(env_config) 18 | register_env("franka_residual", franka_residual_env_creator) 19 | 20 | def franka_rmp_env_creator(env_config): 21 | return FrankaResidualRMPEnv(env_config) 22 | register_env("franka_rmp", franka_rmp_env_creator) 23 | 24 | def three_link_env_creator(env_config): 25 | return ThreeLinkEnv(env_config) 26 | register_env("3link", three_link_env_creator) 27 | 28 | def three_link_residual_env_creator(env_config): 29 | return ThreeLinkResidualEnv(env_config) 30 | register_env("3link_residual", three_link_residual_env_creator) 31 | 32 | def three_link_rmp_env_creator(env_config): 33 | return ThreeLinkResidualRMPEnv(env_config) 34 | register_env("3link_rmp", three_link_rmp_env_creator) 35 | 36 | ModelCatalog.register_custom_model("diag_gaussian_model", DiagGaussianModel) -------------------------------------------------------------------------------- /rmp2/utils/robot_config_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | helper functions for loading robot urdf and rmp configurations 3 | """ 4 | 5 | import yaml 6 | import os 7 | 8 | def load_robot_config(robot_name=None, config_path=None): 9 | if config_path is None: 10 | if robot_name == 'franka': 11 | config_path = os.path.join(os.path.dirname(__file__), '..', 'configs', 'franka_config.yaml') 12 | elif robot_name == '3link': 13 | config_path = os.path.join(os.path.dirname(__file__), '..', 'configs', '3link_config.yaml') 14 | else: 15 | raise ValueError 16 | 17 | print(config_path) 18 | 19 | with open(config_path) as f: 20 | config = yaml.safe_load(f) 21 | if robot_name is not None: 22 | assert(robot_name == config['robot_name']) 23 | else: 24 | robot_name = config['robot_name'] 25 | 26 | return robot_name, config 27 | 28 | 29 | def get_robot_urdf_path(robot_name): 30 | if robot_name == 'franka': 31 | urdf_path = os.path.join(os.path.dirname(__file__), '..', 'urdf', 'panda.urdf') 32 | elif robot_name == '3link': 33 | urdf_path = os.path.join(os.path.dirname(__file__), '..', 'urdf', 'three_link_planar_robot.urdf') 34 | else: 35 | raise ValueError 36 | 37 | return urdf_path 38 | 39 | def get_robot_eef_uid(robot_name): 40 | if robot_name == "franka": 41 | eef_uid = 14 42 | elif robot_name == "3link": 43 | eef_uid = 6 44 | else: 45 | raise ValueError 46 | return eef_uid 47 | -------------------------------------------------------------------------------- /rmp2/utils/tf_transform_utils.py: -------------------------------------------------------------------------------- 1 | # The first version was licensed as "Original Source License"(see below). 2 | # Several enhancements and at UW Robot Learning Lab 3 | # 4 | # Original Source License: 5 | # 6 | # Copyright (c) 2018 mahaarbo 7 | # Licensed under the MIT License. 8 | 9 | """ 10 | rigid body transformation in tensorflow 11 | """ 12 | 13 | import tensorflow as tf 14 | 15 | def rotation_rpy(rpy): 16 | """Returns a rotation matrix from roll pitch yaw. ZYX convention.""" 17 | cr = tf.cos(rpy[0]) 18 | sr = tf.sin(rpy[0]) 19 | cp = tf.cos(rpy[1]) 20 | sp = tf.sin(rpy[1]) 21 | cy = tf.cos(rpy[2]) 22 | sy = tf.sin(rpy[2]) 23 | 24 | return tf.convert_to_tensor([[cy*cp, cy*sp*sr - sy*cr, cy*sp*cr + sy*sr], 25 | [sy*cp, sy*sp*sr + cy*cr, sy*sp*cr - cy*sr], 26 | [ -sp, cp*sr, cp*cr]]) 27 | 28 | 29 | def T_rpy(displacement, rpy): 30 | """Homogeneous transformation matrix with roll pitch yaw.""" 31 | t12 = tf.concat([rotation_rpy(rpy), tf.expand_dims(displacement, -1)], axis=1) 32 | t3 = tf.constant([0., 0., 0., 1.], dtype=displacement.dtype) 33 | T = tf.concat([t12, tf.expand_dims(t3, 0)], axis=0) 34 | return T 35 | 36 | 37 | def T_prismatic(xyz, rpy, axis, qi): 38 | # Origin rotation from RPY ZYX convention 39 | cr = tf.cos(rpy[0]) 40 | sr = tf.sin(rpy[0]) 41 | cp = tf.cos(rpy[1]) 42 | sp = tf.sin(rpy[1]) 43 | cy = tf.cos(rpy[2]) 44 | sy = tf.sin(rpy[2]) 45 | r00 = cy*cp 46 | r01 = cy*sp*sr - sy*cr 47 | r02 = cy*sp*cr + sy*sr 48 | r10 = sy*cp 49 | r11 = sy*sp*sr + cy*cr 50 | r12 = sy*sp*cr - cy*sr 51 | r20 = -sp 52 | r21 = cp*sr 53 | r22 = cp*cr 54 | p0 = r00*axis[0]*qi + r01*axis[1]*qi + r02*axis[2]*qi 55 | p1 = r10*axis[0]*qi + r11*axis[1]*qi + r12*axis[2]*qi 56 | p2 = r20*axis[0]*qi + r21*axis[1]*qi + r22*axis[2]*qi 57 | 58 | # Homogeneous transformation matrix 59 | t00 = tf.ones_like(qi) * r00 60 | t01 = tf.ones_like(qi) * r01 61 | t02 = tf.ones_like(qi) * r02 62 | t03 = xyz[0] + p0 63 | 64 | t10 = tf.ones_like(qi) * r10 65 | t11 = tf.ones_like(qi) * r11 66 | t12 = tf.ones_like(qi) * r12 67 | t13 = xyz[1] + p1 68 | 69 | t20 = tf.ones_like(qi) * r20 70 | t21 = tf.ones_like(qi) * r21 71 | t22 = tf.ones_like(qi) * r22 72 | t23 = xyz[2] + p2 73 | 74 | t30 = tf.zeros_like(qi) 75 | t31 = tf.zeros_like(qi) 76 | t32 = tf.zeros_like(qi) 77 | t33 = tf.ones_like(qi) 78 | 79 | T = tf.stack([ 80 | t00, t01, t02, t03, 81 | t10, t11, t12, t13, 82 | t20, t21, t22, t23, 83 | t30, t31, t32, t33], axis=1) 84 | T = tf.reshape(T, (-1, 4, 4)) 85 | return T 86 | 87 | 88 | def T_revolute(xyz, rpy, axis, qi): 89 | # Origin rotation from RPY ZYX convention 90 | cr = tf.cos(rpy[0]) 91 | sr = tf.sin(rpy[0]) 92 | cp = tf.cos(rpy[1]) 93 | sp = tf.sin(rpy[1]) 94 | cy = tf.cos(rpy[2]) 95 | sy = tf.sin(rpy[2]) 96 | r00 = cy*cp 97 | r01 = cy*sp*sr - sy*cr 98 | r02 = cy*sp*cr + sy*sr 99 | r10 = sy*cp 100 | r11 = sy*sp*sr + cy*cr 101 | r12 = sy*sp*cr - cy*sr 102 | r20 = -sp 103 | r21 = cp*sr 104 | r22 = cp*cr 105 | 106 | # joint rotation from skew sym axis angle 107 | cqi = tf.cos(qi) 108 | sqi = tf.sin(qi) 109 | s00 = (1 - cqi)*axis[0]*axis[0] + cqi 110 | s11 = (1 - cqi)*axis[1]*axis[1] + cqi 111 | s22 = (1 - cqi)*axis[2]*axis[2] + cqi 112 | s01 = (1 - cqi)*axis[0]*axis[1] - axis[2]*sqi 113 | s10 = (1 - cqi)*axis[0]*axis[1] + axis[2]*sqi 114 | s12 = (1 - cqi)*axis[1]*axis[2] - axis[0]*sqi 115 | s21 = (1 - cqi)*axis[1]*axis[2] + axis[0]*sqi 116 | s20 = (1 - cqi)*axis[0]*axis[2] - axis[1]*sqi 117 | s02 = (1 - cqi)*axis[0]*axis[2] + axis[1]*sqi 118 | 119 | # Homogeneous transformation matrix 120 | t00 = r00*s00 + r01*s10 + r02*s20 121 | t10 = r10*s00 + r11*s10 + r12*s20 122 | t20 = r20*s00 + r21*s10 + r22*s20 123 | t30 = tf.zeros_like(qi) 124 | 125 | t01 = r00*s01 + r01*s11 + r02*s21 126 | t11 = r10*s01 + r11*s11 + r12*s21 127 | t21 = r20*s01 + r21*s11 + r22*s21 128 | t31 = tf.zeros_like(qi) 129 | 130 | t02 = r00*s02 + r01*s12 + r02*s22 131 | t12 = r10*s02 + r11*s12 + r12*s22 132 | t22 = r20*s02 + r21*s12 + r22*s22 133 | t32 = tf.zeros_like(qi) 134 | 135 | t03 = tf.ones_like(qi) * xyz[0] 136 | t13 = tf.ones_like(qi) * xyz[1] 137 | t23 = tf.ones_like(qi) * xyz[2] 138 | t33 = tf.ones_like(qi) 139 | 140 | T = tf.stack([ 141 | t00, t01, t02, t03, 142 | t10, t11, t12, t13, 143 | t20, t21, t22, t23, 144 | t30, t31, t32, t33 145 | ], axis=1) 146 | T = tf.reshape(T, (-1, 4, 4)) 147 | 148 | return T -------------------------------------------------------------------------------- /rmp2/utils/tf_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | helper functions in tensorflow 3 | """ 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | from collections.abc import Iterable, Sized 8 | 9 | def ip(x, y, reduce='sum', axis=None): 10 | """ 11 | inner product between vectors or list(s) of tensors 12 | """ 13 | assert reduce == 'sum' or reduce is None 14 | 15 | # if both inputs are tensors, return inner product 16 | if isinstance(x, tf.Tensor) and isinstance(y, tf.Tensor): 17 | assert x.shape == y.shape 18 | return tf.reduce_sum(x * y, axis=axis) 19 | 20 | # if one is tensor, the other is a list of tensors, return the sum (reduction=sum) 21 | # or a list (reduction=None) of inner products 22 | elif isinstance(x, tf.Tensor) and not isinstance(y, tf.Tensor): 23 | assert isinstance(y, Iterable) 24 | if reduce == 'sum': 25 | return sum(tf.reduce_sum(x * yy, axis=axis) for yy in y) 26 | elif reduce is None: 27 | return [tf.reduce_sum(x * yy, axis=axis) for yy in y] 28 | elif not isinstance(x, tf.Tensor) and isinstance(y, tf.Tensor): 29 | assert isinstance(x, Iterable) 30 | if reduce == 'sum': 31 | return sum(tf.reduce_sum(xx * y, axis=axis) for xx in x) 32 | elif reduce is None: 33 | return [tf.reduce_sum(xx * y, axis=axis) for xx in x] 34 | 35 | # if both are lists of tensors, return the sum (reduction=sum) 36 | # or a list (reduction=None) of inner products 37 | else: 38 | assert isinstance(x, Iterable) and isinstance(y, Iterable) 39 | if isinstance(x, Sized) and isinstance(y, Sized): 40 | assert len(x) == len(y) 41 | if reduce == 'sum': 42 | return sum(tf.reduce_sum(xx * yy, axis=axis) for (xx, yy) in zip(x, y)) 43 | elif reduce is None: 44 | return [tf.reduce_sum(xx * yy, axis=axis) for (xx, yy) in zip(x, y)] 45 | 46 | 47 | def ipm(x, y, m, reduce='sum'): 48 | """ 49 | vector-matrix-vector product x^T M y for tensors or lists of tensors 50 | """ 51 | assert reduce == 'sum' or reduce is None 52 | 53 | # if all inputs are tensors, return vector-matrix-vector product 54 | if isinstance(x, tf.Tensor) and isinstance(y, tf.Tensor) and isinstance(m, tf.Tensor): 55 | return tf.einsum('bi,bij,bj->b', x, m, y) 56 | # if all inputs are lists of tensors, return the sum (reduction=sum) 57 | # or a list (reduction=None) of vector-matrix-vector product 58 | else: 59 | assert isinstance(x, Iterable) 60 | assert isinstance(y, Iterable) 61 | assert isinstance(m, Iterable) 62 | assert len(x) == len(y) == len(m) 63 | if reduce == 'sum': 64 | return sum(tf.reduce_sum(tf.einsum('bi,bij,bj->b', xx, mm, yy)) for (xx, mm, yy) in zip(x, m, y)) 65 | elif reduce is None: 66 | return [tf.einsum('bi,bij,bj->b', xx, mm, yy) for (xx, mm, yy) in zip(x, m, y)] 67 | 68 | 69 | def bmm(x, y): 70 | """ 71 | batch matrix multiplication 72 | """ 73 | return tf.matmul(x, y) 74 | 75 | 76 | def pdist2(x, y, epsilon=1e-4): 77 | """ 78 | pairwise distance between x and y in batch 79 | """ 80 | _, n, d = x.shape 81 | m = y.shape[1] 82 | dtype = x.dtype 83 | epsilon = tf.convert_to_tensor(epsilon, dtype=dtype) 84 | return tf.sqrt( 85 | tf.einsum('bij, jk->bik', x ** 2, tf.ones((d, m), dtype=dtype)) + 86 | tf.einsum('ij, bkj->bik', tf.ones((n, d), dtype=dtype), y ** 2) - 87 | 2 * tf.einsum('bij, bkj->bik', x, y) + epsilon) - tf.sqrt(epsilon) 88 | 89 | 90 | def solve(A, b): 91 | """ 92 | solution to linear equation Ax=b in batch 93 | """ 94 | return tf.squeeze(tf.linalg.solve(A, tf.expand_dims(b, -1)), -1) 95 | 96 | 97 | def gradient(gradient_tape, target, sources, 98 | output_gradients=None, 99 | unconnected_gradients=tf.UnconnectedGradients.ZERO): 100 | """ 101 | compute the gradient with default value of unconnected_gradients=0 102 | """ 103 | assert isinstance(gradient_tape, tf.GradientTape) 104 | return gradient_tape.gradient( 105 | target, sources, 106 | output_gradients=output_gradients, 107 | unconnected_gradients=unconnected_gradients 108 | ) 109 | 110 | 111 | def jacobian(gradient_tape, target, sources, 112 | unconnected_gradients=tf.UnconnectedGradients.ZERO, 113 | parallel_iterations=None, 114 | experimental_use_pfor=True): 115 | """ 116 | compute the jacobian with default value of unconnected_gradients=0 117 | """ 118 | assert isinstance(gradient_tape, tf.GradientTape) 119 | return gradient_tape.jacobian( 120 | target, sources, 121 | unconnected_gradients=unconnected_gradients, 122 | parallel_iterations=parallel_iterations, 123 | experimental_use_pfor=experimental_use_pfor 124 | ) 125 | 126 | 127 | def batch_jacobian(gradient_tape, target, source, 128 | unconnected_gradients=tf.UnconnectedGradients.ZERO, 129 | parallel_iterations=None, 130 | experimental_use_pfor=True): 131 | """ 132 | compute the batch_gradient with default value of unconnected_gradients=0 133 | """ 134 | assert isinstance(gradient_tape, tf.GradientTape) 135 | return gradient_tape.batch_jacobian(target, source, 136 | unconnected_gradients=unconnected_gradients, 137 | parallel_iterations=parallel_iterations, 138 | experimental_use_pfor=experimental_use_pfor) 139 | 140 | 141 | class MLP(tf.Module): 142 | def __init__(self, x_shape, y_shape, hidden_units=(), activation='tanh', 143 | hidden_layer_init_scale=1.0, output_layer_init_scale=1.0, init_distribution='uniform', 144 | bias_initializer='zeros', dtype=tf.float32, name='mlp'): 145 | 146 | super(MLP, self).__init__(name=name) 147 | 148 | self.layers = [] 149 | in_dim = x_shape 150 | with self.name_scope: 151 | for h in hidden_units: 152 | kernel_initializer = tf.keras.initializers.VarianceScaling( 153 | mode='fan_avg', 154 | distribution=init_distribution, 155 | scale=hidden_layer_init_scale**2 156 | ) 157 | layer = tf.keras.layers.Dense( 158 | h, activation=activation, 159 | kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, 160 | dtype=dtype) 161 | layer.build(in_dim) 162 | self.layers.append(layer) 163 | in_dim = h 164 | 165 | kernel_initializer = tf.keras.initializers.VarianceScaling( 166 | mode='fan_avg', 167 | distribution=init_distribution, 168 | scale=output_layer_init_scale**2 169 | ) 170 | layer = tf.keras.layers.Dense( 171 | y_shape, 172 | kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, 173 | dtype=dtype) 174 | layer.build(in_dim) 175 | self.layers.append(layer) 176 | 177 | self.units, self.activation = hidden_units, activation 178 | 179 | def __call__(self, x): 180 | for layer in self.layers: 181 | x = layer(x) 182 | return x -------------------------------------------------------------------------------- /startup.sh: -------------------------------------------------------------------------------- 1 | conda activate rmp2rl 2 | export PYTHONPATH=$PYTHONPATH:$PWD 3 | --------------------------------------------------------------------------------