├── .gitignore ├── LICENSE.md ├── README.md ├── __init__.py ├── copy_to_rllib.py ├── copy_to_rllib ├── agents │ └── dqn │ │ └── dqn.py ├── evaluation │ └── metrics.py └── optimizers │ ├── async_replay_optimizer.py │ └── replay_buffer.py ├── devices ├── __init__.py ├── action.py ├── callbacks.py ├── device.py ├── devices │ ├── __init__.py │ ├── nothing.py │ ├── pybulletgui.py │ ├── spacemouse.py │ └── xboxcontroller.py ├── pconnect.py ├── pdisconnect.py └── pupdate.py ├── envs ├── __init__.py ├── robot_real_example.py ├── robot_sim_panda.py ├── robot_sim_robotless.py ├── task.py ├── task_real.py ├── task_sim.py └── urdf │ ├── panda_peg_in_hole │ ├── insertion_box.urdf │ ├── panda_hand.urdf │ ├── panda_peg.urdf │ └── stl │ │ ├── collision │ │ ├── finger.stl │ │ ├── hand.stl │ │ ├── insertion_box.stl │ │ ├── link0.stl │ │ ├── link1.stl │ │ ├── link2.stl │ │ ├── link3.stl │ │ ├── link4.stl │ │ ├── link5.stl │ │ ├── link6.stl │ │ ├── link7.stl │ │ └── peg.stl │ │ └── visual │ │ ├── finger.dae │ │ ├── hand.dae │ │ ├── insertion_box.stl │ │ ├── link0.dae │ │ ├── link1.dae │ │ ├── link2.dae │ │ ├── link3.dae │ │ ├── link4.dae │ │ ├── link5.dae │ │ ├── link6.dae │ │ ├── link7.dae │ │ └── peg.stl │ └── robotless_lap_joint │ ├── stl │ ├── task_lap_90deg_0mm_1.stl │ ├── task_lap_90deg_0mm_2.stl │ ├── task_lap_90deg_0mm_3.stl │ ├── task_lap_90deg_0mm_visual.stl │ ├── task_lap_90deg_1mm_1.stl │ ├── task_lap_90deg_1mm_2.stl │ ├── task_lap_90deg_1mm_3.stl │ ├── task_lap_90deg_1mm_visual.stl │ ├── task_lap_90deg_2mm_1.stl │ ├── task_lap_90deg_2mm_2.stl │ ├── task_lap_90deg_2mm_3.stl │ ├── task_lap_90deg_2mm_visual.stl │ ├── task_lap_90deg_3mm_1.stl │ ├── task_lap_90deg_3mm_2.stl │ ├── task_lap_90deg_3mm_3.stl │ ├── task_lap_90deg_3mm_visual.stl │ ├── tool_gripper_collision_1.stl │ ├── tool_gripper_collision_2.stl │ ├── tool_gripper_collision_3.stl │ ├── tool_gripper_collision_4.stl │ ├── tool_gripper_collision_5.stl │ ├── tool_gripper_collision_6.stl │ ├── tool_gripper_collision_7.stl │ ├── tool_gripper_collision_8.stl │ ├── tool_gripper_visual.stl │ ├── tool_lap_90deg_0mm_collision_1.stl │ ├── tool_lap_90deg_0mm_collision_2.stl │ ├── tool_lap_90deg_0mm_collision_3.stl │ ├── tool_lap_90deg_0mm_visual.stl │ ├── tool_sensor_collision_1.stl │ ├── tool_sensor_collision_2.stl │ ├── tool_sensor_collision_3.stl │ └── tool_sensor_visual.stl │ ├── task_lap_90deg.urdf │ └── tool.urdf ├── envs_launcher.py ├── hyper_parameters └── assembly_apex_ddpg.yaml ├── rollout.py ├── run.py ├── train.py └── utilities.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | .DS_Store 4 | .idea/ 5 | _ignore/ 6 | human_demo_data/ -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Autodesk Research, Autodesk Inc. Copyright (c) 2020 Gramazio Kohler Research, ETH Zürich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Autodesk Deep Reinforcement Learning for Robotic Assembly 2 | 3 | Here we provide a sim-to-real RL training and testing environment for robotic assembly, as well as a modification to APEX-DDPG by adding the option of recording and using human demonstrations. We include two examples in simulation (pybullet), Franka Panda robot performing the peg-in-hole task and a robot-less end-effector performing the lap joint task. We also include a template for connecting your real robot, as you roll out a successfully learned policy. 4 | 5 | ## Installation 6 | 7 | This repository is tested to be compatible with Ray 0.7.5. Hence, the following instruction is for working with Ray 0.7.5. Please feel free to try later versions of Ray and modify the code accordingly.
8 | 9 | 1. Install the conda environment for ray 0.7.5: https://pypi.org/project/ray/. Use Python 3.6. `$ pip install ray==0.7.5` 10 | 2. Install the following dependencies: 11 | * [pybullet](https://pypi.org/project/pybullet/) 12 | * [tensorflow](https://pypi.org/project/tensorflow/) 13 | * [gym](https://pypi.org/project/gym/) 14 | * [opencv-python](https://pypi.org/project/opencv-python/) 15 | * [getch](https://pypi.org/project/getch/) 16 | * [pygame](https://pypi.org/project/pygame/) 17 | * [transforms3d](https://pypi.org/project/transforms3d/) 18 | 19 | ``` 20 | $ pip install pybullet==2.2.6 tensorflow==1.10.0 gym opencv-python getch pygame transforms3d 21 | ``` 22 | 3. Download the ray source code from https://github.com/ray-project/ray/releases/tag/ray-0.7.5 and keep the rllib folder in your local working directory. 23 | 4. In the ray folder, find python/ray/setup-dev.py and run `$ python setup-dev.py` to link to the local rllib. 24 | 5. Clone this repository inside the rllib folder. 25 | 6. Run `$ python copy-to-rllib.py` to install the patch. 26 | 27 | ## Run 28 | 29 | Configure the parameters: 30 | - Environment parameters in `envs_launcher.py` 31 | - Training hyper-parameters in `hyper_parameters/*.yaml` file. 32 | 33 | Test and visualize the simulation environment with default input device: 34 | ``` 35 | python run.py 36 | ``` 37 | 38 | Provide a demonstration with the xbox controller: 39 | 40 | ``` 41 | python run.py --input-type xbc --save-demo-data=True --demo-data-path=human_demo_data/ 42 | ``` 43 | 44 | Train a model: 45 | 46 | ``` 47 | python train.py -f hyper_parameters/*.yaml 48 | ``` 49 | 50 | Roll out a model: 51 | 52 | ``` 53 | python rollout.py /checkpoint-/checkpoint- 54 | ``` 55 | 56 | ## Notes 57 | 58 | - All code is written in [Python 3.6](https://www.python.org/). 59 | - All code was tested on MacOS, Windows, and Ubuntu. 60 | - For licensing information see [LICENSE](LICENSE.md). -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/__init__.py -------------------------------------------------------------------------------- /copy_to_rllib.py: -------------------------------------------------------------------------------- 1 | # rllib patch 2 | 3 | from shutil import copyfile 4 | import os 5 | 6 | # show all the stats in terminal, not just one third 7 | dqn_src = "copy_to_rllib/agents/dqn/dqn.py" 8 | dqn_dst = "../agents/dqn/dqn.py" 9 | copyfile(dqn_src, dqn_dst) 10 | 11 | # human demonstrations & dynamic experience replay 12 | async_src = "copy_to_rllib/optimizers/async_replay_optimizer.py" 13 | async_dst = "../optimizers/async_replay_optimizer.py" 14 | copyfile(async_src, async_dst) 15 | buffer_src = "copy_to_rllib/optimizers/replay_buffer.py" 16 | buffer_dst = "../optimizers/replay_buffer.py" 17 | copyfile(buffer_src, buffer_dst) 18 | 19 | # calculate the custom metrics for one transition, 20 | # not the historic transitions designed by Ray 21 | metrics_src = "copy_to_rllib/evaluation/metrics.py" 22 | metrics_dst = "../evaluation/metrics.py" 23 | copyfile(metrics_src, metrics_dst) 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /copy_to_rllib/agents/dqn/dqn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import logging 6 | 7 | from ray.rllib.agents.trainer import with_common_config 8 | from ray.rllib.agents.trainer_template import build_trainer 9 | from ray.rllib.agents.dqn.dqn_policy import DQNTFPolicy 10 | from ray.rllib.agents.dqn.simple_q_policy import SimpleQPolicy 11 | from ray.rllib.optimizers import SyncReplayOptimizer 12 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID 13 | from ray.rllib.utils.schedules import ConstantSchedule, LinearSchedule 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | # yapf: disable 18 | # __sphinx_doc_begin__ 19 | DEFAULT_CONFIG = with_common_config({ 20 | # === Model === 21 | # Number of atoms for representing the distribution of return. When 22 | # this is greater than 1, distributional Q-learning is used. 23 | # the discrete supports are bounded by v_min and v_max 24 | "num_atoms": 1, 25 | "v_min": -10.0, 26 | "v_max": 10.0, 27 | # Whether to use noisy network 28 | "noisy": False, 29 | # control the initial value of noisy nets 30 | "sigma0": 0.5, 31 | # Whether to use dueling dqn 32 | "dueling": True, 33 | # Whether to use double dqn 34 | "double_q": True, 35 | # Postprocess model outputs with these hidden layers to compute the 36 | # state and action values. See also the model config in catalog.py. 37 | "hiddens": [256], 38 | # N-step Q learning 39 | "n_step": 1, 40 | 41 | # === Exploration === 42 | # Max num timesteps for annealing schedules. Exploration is annealed from 43 | # 1.0 to exploration_fraction over this number of timesteps scaled by 44 | # exploration_fraction 45 | "schedule_max_timesteps": 100000, 46 | # Minimum env steps to optimize for per train call. This value does 47 | # not affect learning, only the length of iterations. 48 | "timesteps_per_iteration": 1000, 49 | # Fraction of entire training period over which the exploration rate is 50 | # annealed 51 | "exploration_fraction": 0.1, 52 | # Final value of random action probability 53 | "exploration_final_eps": 0.02, 54 | # Update the target network every `target_network_update_freq` steps. 55 | "target_network_update_freq": 500, 56 | # Use softmax for sampling actions. Required for off policy estimation. 57 | "soft_q": False, 58 | # Softmax temperature. Q values are divided by this value prior to softmax. 59 | # Softmax approaches argmax as the temperature drops to zero. 60 | "softmax_temp": 1.0, 61 | # If True parameter space noise will be used for exploration 62 | # See https://blog.openai.com/better-exploration-with-parameter-noise/ 63 | "parameter_noise": False, 64 | # Extra configuration that disables exploration. 65 | "evaluation_config": { 66 | "exploration_fraction": 0, 67 | "exploration_final_eps": 0, 68 | }, 69 | 70 | # === Replay buffer === 71 | # Size of the replay buffer. Note that if async_updates is set, then 72 | # each worker will have a replay buffer of this size. 73 | "buffer_size": 50000, 74 | # If True prioritized replay buffer will be used. 75 | "prioritized_replay": True, 76 | # Alpha parameter for prioritized replay buffer. 77 | "prioritized_replay_alpha": 0.6, 78 | # Beta parameter for sampling from prioritized replay buffer. 79 | "prioritized_replay_beta": 0.4, 80 | # Fraction of entire training period over which the beta parameter is 81 | # annealed 82 | "beta_annealing_fraction": 0.2, 83 | # Final value of beta 84 | "final_prioritized_replay_beta": 0.4, 85 | # Epsilon to add to the TD errors when updating priorities. 86 | "prioritized_replay_eps": 1e-6, 87 | # Whether to LZ4 compress observations 88 | "compress_observations": True, 89 | 90 | # === Optimization === 91 | # Learning rate for adam optimizer 92 | "lr": 5e-4, 93 | # Learning rate schedule 94 | "lr_schedule": None, 95 | # Adam epsilon hyper parameter 96 | "adam_epsilon": 1e-8, 97 | # If not None, clip gradients during optimization at this value 98 | "grad_norm_clipping": 40, 99 | # How many steps of the model to sample before learning starts. 100 | "learning_starts": 1000, 101 | # Update the replay buffer with this many samples at once. Note that 102 | # this setting applies per-worker if num_workers > 1. 103 | "sample_batch_size": 4, 104 | # Size of a batched sampled from replay buffer for training. Note that 105 | # if async_updates is set, then each worker returns gradients for a 106 | # batch of this size. 107 | "train_batch_size": 32, 108 | 109 | # === Parallelism === 110 | # Number of workers for collecting samples with. This only makes sense 111 | # to increase if your environment is particularly slow to sample, or if 112 | # you"re using the Async or Ape-X optimizers. 113 | "num_workers": 0, 114 | # Whether to use a distribution of epsilons across workers for exploration. 115 | "per_worker_exploration": False, 116 | # Whether to compute priorities on workers. 117 | "worker_side_prioritization": False, 118 | # Prevent iterations from going lower than this time span 119 | "min_iter_time_s": 1, 120 | }) 121 | # __sphinx_doc_end__ 122 | # yapf: enable 123 | 124 | 125 | def make_optimizer(workers, config): 126 | return SyncReplayOptimizer( 127 | workers, 128 | learning_starts=config["learning_starts"], 129 | buffer_size=config["buffer_size"], 130 | prioritized_replay=config["prioritized_replay"], 131 | prioritized_replay_alpha=config["prioritized_replay_alpha"], 132 | prioritized_replay_beta=config["prioritized_replay_beta"], 133 | schedule_max_timesteps=config["schedule_max_timesteps"], 134 | beta_annealing_fraction=config["beta_annealing_fraction"], 135 | final_prioritized_replay_beta=config["final_prioritized_replay_beta"], 136 | prioritized_replay_eps=config["prioritized_replay_eps"], 137 | train_batch_size=config["train_batch_size"], 138 | sample_batch_size=config["sample_batch_size"], 139 | **config["optimizer"]) 140 | 141 | 142 | def check_config_and_setup_param_noise(config): 143 | """Update the config based on settings. 144 | 145 | Rewrites sample_batch_size to take into account n_step truncation, and also 146 | adds the necessary callbacks to support parameter space noise exploration. 147 | """ 148 | 149 | # Update effective batch size to include n-step 150 | adjusted_batch_size = max(config["sample_batch_size"], 151 | config.get("n_step", 1)) 152 | config["sample_batch_size"] = adjusted_batch_size 153 | 154 | if config.get("parameter_noise", False): 155 | if config["batch_mode"] != "complete_episodes": 156 | raise ValueError("Exploration with parameter space noise requires " 157 | "batch_mode to be complete_episodes.") 158 | if config.get("noisy", False): 159 | raise ValueError( 160 | "Exploration with parameter space noise and noisy network " 161 | "cannot be used at the same time.") 162 | if config["callbacks"]["on_episode_start"]: 163 | start_callback = config["callbacks"]["on_episode_start"] 164 | else: 165 | start_callback = None 166 | 167 | def on_episode_start(info): 168 | # as a callback function to sample and pose parameter space 169 | # noise on the parameters of network 170 | policies = info["policy"] 171 | for pi in policies.values(): 172 | pi.add_parameter_noise() 173 | if start_callback: 174 | start_callback(info) 175 | 176 | config["callbacks"]["on_episode_start"] = on_episode_start 177 | if config["callbacks"]["on_episode_end"]: 178 | end_callback = config["callbacks"]["on_episode_end"] 179 | else: 180 | end_callback = None 181 | 182 | def on_episode_end(info): 183 | # as a callback function to monitor the distance 184 | # between noisy policy and original policy 185 | policies = info["policy"] 186 | episode = info["episode"] 187 | model = policies[DEFAULT_POLICY_ID].model 188 | if hasattr(model, "pi_distance"): 189 | episode.custom_metrics["policy_distance"] = model.pi_distance 190 | if end_callback: 191 | end_callback(info) 192 | 193 | config["callbacks"]["on_episode_end"] = on_episode_end 194 | 195 | 196 | def get_initial_state(config): 197 | return { 198 | "last_target_update_ts": 0, 199 | "num_target_updates": 0, 200 | } 201 | 202 | 203 | def make_exploration_schedule(config, worker_index): 204 | # Use either a different `eps` per worker, or a linear schedule. 205 | if config["per_worker_exploration"]: 206 | assert config["num_workers"] > 1, \ 207 | "This requires multiple workers" 208 | if worker_index >= 0: 209 | # Exploration constants from the Ape-X paper 210 | exponent = ( 211 | 1 + worker_index / float(config["num_workers"] - 1) * 7) 212 | return ConstantSchedule(0.4**exponent) 213 | else: 214 | # local ev should have zero exploration so that eval rollouts 215 | # run properly 216 | return ConstantSchedule(0.0) 217 | return LinearSchedule( 218 | schedule_timesteps=int( 219 | config["exploration_fraction"] * config["schedule_max_timesteps"]), 220 | initial_p=1.0, 221 | final_p=config["exploration_final_eps"]) 222 | 223 | 224 | def setup_exploration(trainer): 225 | trainer.exploration0 = make_exploration_schedule(trainer.config, -1) 226 | trainer.explorations = [ 227 | make_exploration_schedule(trainer.config, i) 228 | for i in range(trainer.config["num_workers"]) 229 | ] 230 | 231 | 232 | def update_worker_explorations(trainer): 233 | global_timestep = trainer.optimizer.num_steps_sampled 234 | exp_vals = [trainer.exploration0.value(global_timestep)] 235 | trainer.workers.local_worker().foreach_trainable_policy( 236 | lambda p, _: p.set_epsilon(exp_vals[0])) 237 | for i, e in enumerate(trainer.workers.remote_workers()): 238 | exp_val = trainer.explorations[i].value(global_timestep) 239 | e.foreach_trainable_policy.remote(lambda p, _: p.set_epsilon(exp_val)) 240 | exp_vals.append(exp_val) 241 | trainer.train_start_timestep = global_timestep 242 | trainer.cur_exp_vals = exp_vals 243 | 244 | 245 | def add_trainer_metrics(trainer, result): 246 | global_timestep = trainer.optimizer.num_steps_sampled 247 | result.update( 248 | timesteps_this_iter=global_timestep - trainer.train_start_timestep, 249 | info=dict({ 250 | "min_exploration": min(trainer.cur_exp_vals), 251 | "max_exploration": max(trainer.cur_exp_vals), 252 | "num_target_updates": trainer.state["num_target_updates"], 253 | }, **trainer.optimizer.stats())) 254 | 255 | 256 | def update_target_if_needed(trainer, fetches): 257 | global_timestep = trainer.optimizer.num_steps_sampled 258 | if global_timestep - trainer.state["last_target_update_ts"] > \ 259 | trainer.config["target_network_update_freq"]: 260 | trainer.workers.local_worker().foreach_trainable_policy( 261 | lambda p, _: p.update_target()) 262 | trainer.state["last_target_update_ts"] = global_timestep 263 | trainer.state["num_target_updates"] += 1 264 | 265 | 266 | def collect_metrics(trainer): 267 | if trainer.config["per_worker_exploration"]: 268 | # Only collect metrics from the third of workers with lowest eps 269 | result = trainer.collect_metrics( 270 | selected_workers=trainer.workers.remote_workers()[ 271 | -len(trainer.workers.remote_workers()) // 1:]) 272 | else: 273 | result = trainer.collect_metrics() 274 | return result 275 | 276 | 277 | def disable_exploration(trainer): 278 | trainer.evaluation_workers.local_worker().foreach_policy( 279 | lambda p, _: p.set_epsilon(0)) 280 | 281 | 282 | GenericOffPolicyTrainer = build_trainer( 283 | name="GenericOffPolicyAlgorithm", 284 | default_policy=None, 285 | default_config=DEFAULT_CONFIG, 286 | validate_config=check_config_and_setup_param_noise, 287 | get_initial_state=get_initial_state, 288 | make_policy_optimizer=make_optimizer, 289 | before_init=setup_exploration, 290 | before_train_step=update_worker_explorations, 291 | after_optimizer_step=update_target_if_needed, 292 | after_train_result=add_trainer_metrics, 293 | collect_metrics_fn=collect_metrics, 294 | before_evaluate_fn=disable_exploration) 295 | 296 | DQNTrainer = GenericOffPolicyTrainer.with_updates( 297 | name="DQN", default_policy=DQNTFPolicy, default_config=DEFAULT_CONFIG) 298 | 299 | SimpleQTrainer = DQNTrainer.with_updates(default_policy=SimpleQPolicy) 300 | -------------------------------------------------------------------------------- /copy_to_rllib/evaluation/metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import logging 6 | import numpy as np 7 | import collections 8 | 9 | import ray 10 | from ray.rllib.evaluation.rollout_metrics import RolloutMetrics 11 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID 12 | from ray.rllib.offline.off_policy_estimator import OffPolicyEstimate 13 | from ray.rllib.policy.policy import LEARNER_STATS_KEY 14 | from ray.rllib.utils.annotations import DeveloperAPI 15 | from ray.rllib.utils.memory import ray_get_and_free 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | @DeveloperAPI 21 | def get_learner_stats(grad_info): 22 | """Return optimization stats reported from the policy. 23 | 24 | Example: 25 | >>> grad_info = evaluator.learn_on_batch(samples) 26 | >>> print(get_stats(grad_info)) 27 | {"vf_loss": ..., "policy_loss": ...} 28 | """ 29 | 30 | if LEARNER_STATS_KEY in grad_info: 31 | return grad_info[LEARNER_STATS_KEY] 32 | 33 | multiagent_stats = {} 34 | for k, v in grad_info.items(): 35 | if type(v) is dict: 36 | if LEARNER_STATS_KEY in v: 37 | multiagent_stats[k] = v[LEARNER_STATS_KEY] 38 | 39 | return multiagent_stats 40 | 41 | 42 | @DeveloperAPI 43 | def collect_metrics(local_worker=None, 44 | remote_workers=[], 45 | to_be_collected=[], 46 | timeout_seconds=180): 47 | """Gathers episode metrics from RolloutWorker instances.""" 48 | 49 | episodes, to_be_collected = collect_episodes( 50 | local_worker, 51 | remote_workers, 52 | to_be_collected, 53 | timeout_seconds=timeout_seconds) 54 | metrics = summarize_episodes(episodes, episodes) 55 | return metrics 56 | 57 | 58 | @DeveloperAPI 59 | def collect_episodes(local_worker=None, 60 | remote_workers=[], 61 | to_be_collected=[], 62 | timeout_seconds=180): 63 | """Gathers new episodes metrics tuples from the given evaluators.""" 64 | 65 | if remote_workers: 66 | pending = [ 67 | a.apply.remote(lambda ev: ev.get_metrics()) for a in remote_workers 68 | ] + to_be_collected 69 | collected, to_be_collected = ray.wait( 70 | pending, num_returns=len(pending), timeout=timeout_seconds * 1.0) 71 | if pending and len(collected) == 0: 72 | logger.warning( 73 | "WARNING: collected no metrics in {} seconds".format( 74 | timeout_seconds)) 75 | metric_lists = ray_get_and_free(collected) 76 | else: 77 | metric_lists = [] 78 | 79 | if local_worker: 80 | metric_lists.append(local_worker.get_metrics()) 81 | episodes = [] 82 | for metrics in metric_lists: 83 | episodes.extend(metrics) 84 | return episodes, to_be_collected 85 | 86 | 87 | @DeveloperAPI 88 | def summarize_episodes(episodes, new_episodes): 89 | """Summarizes a set of episode metrics tuples. 90 | 91 | Arguments: 92 | episodes: smoothed set of episodes including historical ones 93 | new_episodes: just the new episodes in this iteration 94 | """ 95 | 96 | episodes, estimates = _partition(episodes) 97 | new_episodes, _ = _partition(new_episodes) 98 | 99 | episode_rewards = [] 100 | episode_lengths = [] 101 | policy_rewards = collections.defaultdict(list) 102 | custom_metrics = collections.defaultdict(list) 103 | perf_stats = collections.defaultdict(list) 104 | for episode in episodes: 105 | episode_lengths.append(episode.episode_length) 106 | episode_rewards.append(episode.episode_reward) 107 | # for k, v in episode.custom_metrics.items(): 108 | # custom_metrics[k].append(v) 109 | for k, v in episode.perf_stats.items(): 110 | perf_stats[k].append(v) 111 | for (_, policy_id), reward in episode.agent_rewards.items(): 112 | if policy_id != DEFAULT_POLICY_ID: 113 | policy_rewards[policy_id].append(reward) 114 | # only calculate the custom_metrics on the episodes in the current iteration 115 | # to have an accurate successful rate 116 | for episode in new_episodes: 117 | for k, v in episode.custom_metrics.items(): 118 | custom_metrics[k].append(v) 119 | if episode_rewards: 120 | min_reward = min(episode_rewards) 121 | max_reward = max(episode_rewards) 122 | else: 123 | min_reward = float("nan") 124 | max_reward = float("nan") 125 | avg_reward = np.mean(episode_rewards) 126 | avg_length = np.mean(episode_lengths) 127 | 128 | policy_reward_min = {} 129 | policy_reward_mean = {} 130 | policy_reward_max = {} 131 | for policy_id, rewards in policy_rewards.copy().items(): 132 | policy_reward_min[policy_id] = np.min(rewards) 133 | policy_reward_mean[policy_id] = np.mean(rewards) 134 | policy_reward_max[policy_id] = np.max(rewards) 135 | 136 | for k, v_list in custom_metrics.copy().items(): 137 | custom_metrics[k + "_mean"] = np.mean(v_list) 138 | filt = [v for v in v_list if not np.isnan(v)] 139 | if filt: 140 | custom_metrics[k + "_min"] = np.min(filt) 141 | custom_metrics[k + "_max"] = np.max(filt) 142 | else: 143 | custom_metrics[k + "_min"] = float("nan") 144 | custom_metrics[k + "_max"] = float("nan") 145 | del custom_metrics[k] 146 | 147 | for k, v_list in perf_stats.copy().items(): 148 | perf_stats[k] = np.mean(v_list) 149 | 150 | estimators = collections.defaultdict(lambda: collections.defaultdict(list)) 151 | for e in estimates: 152 | acc = estimators[e.estimator_name] 153 | for k, v in e.metrics.items(): 154 | acc[k].append(v) 155 | for name, metrics in estimators.items(): 156 | for k, v_list in metrics.items(): 157 | metrics[k] = np.mean(v_list) 158 | estimators[name] = dict(metrics) 159 | 160 | return dict( 161 | episode_reward_max=max_reward, 162 | episode_reward_min=min_reward, 163 | episode_reward_mean=avg_reward, 164 | episode_len_mean=avg_length, 165 | episodes_this_iter=len(new_episodes), 166 | policy_reward_min=policy_reward_min, 167 | policy_reward_max=policy_reward_max, 168 | policy_reward_mean=policy_reward_mean, 169 | custom_metrics=dict(custom_metrics), 170 | sampler_perf=dict(perf_stats), 171 | off_policy_estimator=dict(estimators)) 172 | 173 | 174 | def _partition(episodes): 175 | """Divides metrics data into true rollouts vs off-policy estimates.""" 176 | 177 | rollouts, estimates = [], [] 178 | for e in episodes: 179 | if isinstance(e, RolloutMetrics): 180 | rollouts.append(e) 181 | elif isinstance(e, OffPolicyEstimate): 182 | estimates.append(e) 183 | else: 184 | raise ValueError("Unknown metric type: {}".format(e)) 185 | return rollouts, estimates 186 | -------------------------------------------------------------------------------- /copy_to_rllib/optimizers/async_replay_optimizer.py: -------------------------------------------------------------------------------- 1 | """Implements Distributed Prioritized Experience Replay. 2 | 3 | https://arxiv.org/abs/1803.00933""" 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import collections 10 | import os 11 | import random 12 | import time 13 | import threading 14 | 15 | import numpy as np 16 | from six.moves import queue 17 | 18 | import ray 19 | from ray.rllib.evaluation.metrics import get_learner_stats 20 | from ray.rllib.policy.sample_batch import SampleBatch, DEFAULT_POLICY_ID, \ 21 | MultiAgentBatch 22 | from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer 23 | from ray.rllib.optimizers.replay_buffer import PrioritizedReplayBuffer 24 | from ray.rllib.utils.annotations import override 25 | from ray.rllib.utils.actors import TaskPool, create_colocated 26 | from ray.rllib.utils.memory import ray_get_and_free 27 | from ray.rllib.utils.timer import TimerStat 28 | from ray.rllib.utils.window_stat import WindowStat 29 | 30 | SAMPLE_QUEUE_DEPTH = 2 31 | REPLAY_QUEUE_DEPTH = 4 32 | LEARNER_QUEUE_MAX_SIZE = 16 33 | 34 | 35 | # added for dynamic experience replay 36 | import os, os.path, inspect 37 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 38 | parentdir = os.path.dirname(currentdir) 39 | import pickle 40 | from collections import deque 41 | 42 | # print texts in green in terminal 43 | def prGreen(skk): print("\033[92m {}\033[00m" .format(skk)) 44 | 45 | # filter out invisible files and sub-folders 46 | def file_filter(names): 47 | filtered_files = [] 48 | for name in names: 49 | if name.startswith(".") is False: 50 | filtered_files.append(name) 51 | return filtered_files 52 | # added for dynamic experience replay 53 | 54 | 55 | class AsyncReplayOptimizer(PolicyOptimizer): 56 | """Main event loop of the Ape-X optimizer (async sampling with replay). 57 | 58 | This class coordinates the data transfers between the learner thread, 59 | remote workers (Ape-X actors), and replay buffer actors. 60 | 61 | This has two modes of operation: 62 | - normal replay: replays independent samples. 63 | - batch replay: simplified mode where entire sample batches are 64 | replayed. This supports RNNs, but not prioritization. 65 | 66 | This optimizer requires that rollout workers return an additional 67 | "td_error" array in the info return of compute_gradients(). This error 68 | term will be used for sample prioritization.""" 69 | 70 | def __init__(self, 71 | workers, 72 | learning_starts=1000, 73 | buffer_size=10000, 74 | prioritized_replay=True, 75 | prioritized_replay_alpha=0.6, 76 | prioritized_replay_beta=0.4, 77 | prioritized_replay_eps=1e-6, 78 | train_batch_size=512, 79 | sample_batch_size=50, 80 | num_replay_buffer_shards=1, 81 | max_weight_sync_delay=400, 82 | debug=False, 83 | batch_replay=False, 84 | # added for dynamic experience replay 85 | human_demonstration=False, 86 | multiple_human_data=-1, 87 | human_data_dir=None, 88 | dynamic_experience_replay=False, 89 | demonstration_zone_percentage=0, 90 | robot_demo_path=None): 91 | """Initialize an async replay optimizer. 92 | 93 | Arguments: 94 | workers (WorkerSet): all workers 95 | learning_starts (int): wait until this many steps have been sampled 96 | before starting optimization. 97 | buffer_size (int): max size of the replay buffer 98 | prioritized_replay (bool): whether to enable prioritized replay 99 | prioritized_replay_alpha (float): replay alpha hyperparameter 100 | prioritized_replay_beta (float): replay beta hyperparameter 101 | prioritized_replay_eps (float): replay eps hyperparameter 102 | train_batch_size (int): size of batches to learn on 103 | sample_batch_size (int): size of batches to sample from workers 104 | num_replay_buffer_shards (int): number of actors to use to store 105 | replay samples 106 | max_weight_sync_delay (int): update the weights of a rollout worker 107 | after collecting this number of timesteps from it 108 | debug (bool): return extra debug stats 109 | batch_replay (bool): replay entire sequential batches of 110 | experiences instead of sampling steps individually 111 | """ 112 | PolicyOptimizer.__init__(self, workers) 113 | 114 | self.debug = debug 115 | self.batch_replay = batch_replay 116 | self.replay_starts = learning_starts 117 | self.prioritized_replay_beta = prioritized_replay_beta 118 | self.prioritized_replay_eps = prioritized_replay_eps 119 | self.max_weight_sync_delay = max_weight_sync_delay 120 | 121 | self.learner = LearnerThread(self.workers.local_worker()) 122 | self.learner.start() 123 | 124 | # added for dynamic experience replay 125 | self.dynamic_experience_replay = dynamic_experience_replay 126 | 127 | if self.batch_replay: 128 | replay_cls = BatchReplayActor 129 | else: 130 | replay_cls = ReplayActor 131 | self.replay_actors = create_colocated(replay_cls, [ 132 | num_replay_buffer_shards, 133 | learning_starts, 134 | buffer_size, 135 | train_batch_size, 136 | prioritized_replay_alpha, 137 | prioritized_replay_beta, 138 | prioritized_replay_eps, 139 | # added for dynamic experience replay 140 | human_demonstration, 141 | multiple_human_data, 142 | human_data_dir, 143 | dynamic_experience_replay, 144 | demonstration_zone_percentage, 145 | robot_demo_path 146 | ], num_replay_buffer_shards) 147 | 148 | # Stats 149 | self.timers = { 150 | k: TimerStat() 151 | for k in [ 152 | "put_weights", "get_samples", "sample_processing", 153 | "replay_processing", "update_priorities", "train", "sample" 154 | ] 155 | } 156 | self.num_weight_syncs = 0 157 | self.num_samples_dropped = 0 158 | self.learning_started = False 159 | 160 | # Number of worker steps since the last weight update 161 | self.steps_since_update = {} 162 | 163 | # Otherwise kick of replay tasks for local gradient updates 164 | self.replay_tasks = TaskPool() 165 | for ra in self.replay_actors: 166 | for _ in range(REPLAY_QUEUE_DEPTH): 167 | self.replay_tasks.add(ra, ra.replay.remote()) 168 | 169 | # Kick off async background sampling 170 | self.sample_tasks = TaskPool() 171 | if self.workers.remote_workers(): 172 | self._set_workers(self.workers.remote_workers()) 173 | 174 | @override(PolicyOptimizer) 175 | def step(self): 176 | assert self.learner.is_alive() 177 | assert len(self.workers.remote_workers()) > 0 178 | start = time.time() 179 | sample_timesteps, train_timesteps = self._step() 180 | time_delta = time.time() - start 181 | self.timers["sample"].push(time_delta) 182 | self.timers["sample"].push_units_processed(sample_timesteps) 183 | if train_timesteps > 0: 184 | self.learning_started = True 185 | if self.learning_started: 186 | self.timers["train"].push(time_delta) 187 | self.timers["train"].push_units_processed(train_timesteps) 188 | self.num_steps_sampled += sample_timesteps 189 | self.num_steps_trained += train_timesteps 190 | 191 | @override(PolicyOptimizer) 192 | def stop(self): 193 | for r in self.replay_actors: 194 | r.__ray_terminate__.remote() 195 | self.learner.stopped = True 196 | 197 | @override(PolicyOptimizer) 198 | def reset(self, remote_workers): 199 | self.workers.reset(remote_workers) 200 | self.sample_tasks.reset_workers(remote_workers) 201 | 202 | @override(PolicyOptimizer) 203 | def stats(self): 204 | replay_stats = ray_get_and_free(self.replay_actors[0].stats.remote( 205 | self.debug)) 206 | timing = { 207 | "{}_time_ms".format(k): round(1000 * self.timers[k].mean, 3) 208 | for k in self.timers 209 | } 210 | timing["learner_grad_time_ms"] = round( 211 | 1000 * self.learner.grad_timer.mean, 3) 212 | timing["learner_dequeue_time_ms"] = round( 213 | 1000 * self.learner.queue_timer.mean, 3) 214 | stats = { 215 | "sample_throughput": round(self.timers["sample"].mean_throughput, 216 | 3), 217 | "train_throughput": round(self.timers["train"].mean_throughput, 3), 218 | "num_weight_syncs": self.num_weight_syncs, 219 | "num_samples_dropped": self.num_samples_dropped, 220 | "learner_queue": self.learner.learner_queue_size.stats(), 221 | "replay_shard_0": replay_stats, 222 | } 223 | debug_stats = { 224 | "timing_breakdown": timing, 225 | "pending_sample_tasks": self.sample_tasks.count, 226 | "pending_replay_tasks": self.replay_tasks.count, 227 | } 228 | if self.debug: 229 | stats.update(debug_stats) 230 | if self.learner.stats: 231 | stats["learner"] = self.learner.stats 232 | return dict(PolicyOptimizer.stats(self), **stats) 233 | 234 | # For https://github.com/ray-project/ray/issues/2541 only 235 | def _set_workers(self, remote_workers): 236 | self.workers.reset(remote_workers) 237 | weights = self.workers.local_worker().get_weights() 238 | for ev in self.workers.remote_workers(): 239 | ev.set_weights.remote(weights) 240 | self.steps_since_update[ev] = 0 241 | for _ in range(SAMPLE_QUEUE_DEPTH): 242 | self.sample_tasks.add(ev, ev.sample_with_count.remote()) 243 | 244 | def _step(self): 245 | sample_timesteps, train_timesteps = 0, 0 246 | weights = None 247 | 248 | with self.timers["sample_processing"]: 249 | completed = list(self.sample_tasks.completed()) 250 | counts = ray_get_and_free([c[1][1] for c in completed]) 251 | for i, (ev, (sample_batch, count)) in enumerate(completed): 252 | sample_timesteps += counts[i] 253 | 254 | # Send the data to the replay buffer 255 | random.choice( 256 | self.replay_actors).add_batch.remote(sample_batch) 257 | 258 | # Update weights if needed 259 | self.steps_since_update[ev] += counts[i] 260 | if self.steps_since_update[ev] >= self.max_weight_sync_delay: 261 | # Note that it's important to pull new weights once 262 | # updated to avoid excessive correlation between actors 263 | if weights is None or self.learner.weights_updated: 264 | self.learner.weights_updated = False 265 | with self.timers["put_weights"]: 266 | weights = ray.put( 267 | self.workers.local_worker().get_weights()) 268 | ev.set_weights.remote(weights) 269 | self.num_weight_syncs += 1 270 | self.steps_since_update[ev] = 0 271 | 272 | # Kick off another sample request 273 | self.sample_tasks.add(ev, ev.sample_with_count.remote()) 274 | 275 | # added for dynamic experience replay 276 | if self.dynamic_experience_replay and random.random() < 0.0001: 277 | random.choice(self.replay_actors).update_demos.remote() 278 | 279 | with self.timers["replay_processing"]: 280 | for ra, replay in self.replay_tasks.completed(): 281 | self.replay_tasks.add(ra, ra.replay.remote()) 282 | if self.learner.inqueue.full(): 283 | self.num_samples_dropped += 1 284 | else: 285 | with self.timers["get_samples"]: 286 | samples = ray_get_and_free(replay) 287 | # Defensive copy against plasma crashes, see #2610 #3452 288 | self.learner.inqueue.put((ra, samples and samples.copy())) 289 | 290 | with self.timers["update_priorities"]: 291 | while not self.learner.outqueue.empty(): 292 | ra, prio_dict, count = self.learner.outqueue.get() 293 | ra.update_priorities.remote(prio_dict) 294 | train_timesteps += count 295 | 296 | return sample_timesteps, train_timesteps 297 | 298 | 299 | @ray.remote(num_cpus=0) 300 | class ReplayActor(object): 301 | """A replay buffer shard. 302 | 303 | Ray actors are single-threaded, so for scalability multiple replay actors 304 | may be created to increase parallelism.""" 305 | 306 | def __init__(self, num_shards, learning_starts, buffer_size, 307 | train_batch_size, prioritized_replay_alpha, 308 | prioritized_replay_beta, prioritized_replay_eps, 309 | # added for dynamic experience replay 310 | human_demonstration, multiple_human_data, human_data_dir, 311 | dynamic_experience_replay, demonstration_zone_percentage, robot_demo_path): 312 | self.replay_starts = learning_starts // num_shards 313 | self.buffer_size = buffer_size // num_shards 314 | self.train_batch_size = train_batch_size 315 | self.prioritized_replay_beta = prioritized_replay_beta 316 | self.prioritized_replay_eps = prioritized_replay_eps 317 | 318 | # Metrics 319 | self.add_batch_timer = TimerStat() 320 | self.replay_timer = TimerStat() 321 | self.update_priorities_timer = TimerStat() 322 | self.num_added = 0 323 | 324 | 325 | # added for dynamic experience replay 326 | self.prioritized_replay_alpha = prioritized_replay_alpha 327 | self.human_demonstration = human_demonstration 328 | self.multiple_human_data = multiple_human_data 329 | self.human_data_dir = human_data_dir 330 | self.dynamic_experience_replay = dynamic_experience_replay 331 | self.demonstration_zone_percentage = demonstration_zone_percentage 332 | self.robot_demo_path = robot_demo_path 333 | self.load_human_demo() 334 | 335 | 336 | # added for dynamic experience replay 337 | def load_human_demo(self): 338 | if self.human_demonstration: 339 | assert self.human_data_dir is not None, "human demonstration path is None" 340 | assert (self.multiple_human_data == -1 or 341 | self.multiple_human_data == 0 or 342 | self.multiple_human_data == 1), "multiple_human_data has to be either -1, 0, or 1" 343 | 344 | # one shot in all buffers 345 | if self.multiple_human_data == -1: 346 | in_file = open(self.human_data_dir, 'rb') 347 | human_data = pickle.load(in_file) 348 | in_file.close() 349 | prGreen("Human demo data loaded from: {}".format(self.human_data_dir)) 350 | 351 | # all the shots in one buffer 352 | elif self.multiple_human_data == 0: 353 | names = os.listdir(self.human_data_dir) 354 | filtered_files = file_filter(names) 355 | prGreen("Insert {} demonstrations to the buffer".format(len(filtered_files))) 356 | 357 | human_data = deque() 358 | human_demo_dir = None 359 | for i in range(len(filtered_files)): 360 | human_demo_dir = self.human_data_dir + str(i) 361 | in_file = open(human_demo_dir, 'rb') 362 | human_data.extend(pickle.load(in_file)) 363 | in_file.close() 364 | 365 | # each shot occupies one buffer 366 | elif self.multiple_human_data == 1: 367 | names = os.listdir(self.human_data_dir) 368 | human_data_dir = self.human_data_dir 369 | filtered_files = file_filter(names) 370 | prGreen("Sampling from {} demonstrations".format(len(filtered_files))) 371 | selected_transition = random.randint(0,len(filtered_files)-1) 372 | if selected_transition is not None: 373 | human_data_dir += str(selected_transition) 374 | in_file = open(human_data_dir, 'rb') 375 | human_data = pickle.load(in_file) 376 | in_file.close() 377 | prGreen("Human demo data loaded from: {}".format(human_data_dir)) 378 | 379 | def new_buffer(): 380 | return PrioritizedReplayBuffer( 381 | self.buffer_size, alpha=self.prioritized_replay_alpha, 382 | permanent_data_length=len(human_data), human_data=human_data, 383 | dynamic_experience_replay=self.dynamic_experience_replay, 384 | demonstration_zone_percentage=self.demonstration_zone_percentage) 385 | self.replay_buffers = collections.defaultdict(new_buffer) 386 | prGreen("{} human transitions are loaded to the replay buffer.".format(len(human_data))) 387 | 388 | else: 389 | def new_buffer(): 390 | return PrioritizedReplayBuffer( 391 | self.buffer_size, alpha=self.prioritized_replay_alpha, 392 | dynamic_experience_replay=self.dynamic_experience_replay, 393 | demonstration_zone_percentage=self.demonstration_zone_percentage) 394 | self.replay_buffers = collections.defaultdict(new_buffer) 395 | 396 | 397 | # added for dynamic experience replay 398 | def update_demos(self): 399 | # randomly selecting a demo 400 | robot_data_dir = self.robot_demo_path 401 | names = os.listdir(robot_data_dir) 402 | filtered_files = file_filter(names) 403 | 404 | if len(filtered_files) > 0: 405 | selected_transition = filtered_files[random.randint(0,len(filtered_files)-1)] 406 | robot_data_dir += str(selected_transition) 407 | in_file = open(robot_data_dir, 'rb') 408 | demo_data = pickle.load(in_file) 409 | in_file.close() 410 | 411 | for policy_id, replay_buffer in self.replay_buffers.items(): 412 | replay_buffer.update_demos(demo_data) 413 | prGreen("Newly sampled demo length: {}".format(len(demo_data))) 414 | prGreen("The current demonstration size: {}".format(replay_buffer._permanent_data_length)) 415 | 416 | 417 | def get_host(self): 418 | return os.uname()[1] 419 | 420 | def add_batch(self, batch): 421 | # Handle everything as if multiagent 422 | if isinstance(batch, SampleBatch): 423 | batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch}, batch.count) 424 | with self.add_batch_timer: 425 | for policy_id, s in batch.policy_batches.items(): 426 | for row in s.rows(): 427 | self.replay_buffers[policy_id].add( 428 | row["obs"], row["actions"], row["rewards"], 429 | row["new_obs"], row["dones"], row["weights"]) 430 | self.num_added += batch.count 431 | 432 | def replay(self): 433 | if self.num_added < self.replay_starts: 434 | return None 435 | 436 | with self.replay_timer: 437 | samples = {} 438 | for policy_id, replay_buffer in self.replay_buffers.items(): 439 | (obses_t, actions, rewards, obses_tp1, dones, weights, 440 | batch_indexes) = replay_buffer.sample( 441 | self.train_batch_size, beta=self.prioritized_replay_beta) 442 | samples[policy_id] = SampleBatch({ 443 | "obs": obses_t, 444 | "actions": actions, 445 | "rewards": rewards, 446 | "new_obs": obses_tp1, 447 | "dones": dones, 448 | "weights": weights, 449 | "batch_indexes": batch_indexes 450 | }) 451 | return MultiAgentBatch(samples, self.train_batch_size) 452 | 453 | def update_priorities(self, prio_dict): 454 | with self.update_priorities_timer: 455 | for policy_id, (batch_indexes, td_errors) in prio_dict.items(): 456 | new_priorities = ( 457 | np.abs(td_errors) + self.prioritized_replay_eps) 458 | self.replay_buffers[policy_id].update_priorities( 459 | batch_indexes, new_priorities) 460 | 461 | def stats(self, debug=False): 462 | stat = { 463 | "add_batch_time_ms": round(1000 * self.add_batch_timer.mean, 3), 464 | "replay_time_ms": round(1000 * self.replay_timer.mean, 3), 465 | "update_priorities_time_ms": round( 466 | 1000 * self.update_priorities_timer.mean, 3), 467 | } 468 | for policy_id, replay_buffer in self.replay_buffers.items(): 469 | stat.update({ 470 | "policy_{}".format(policy_id): replay_buffer.stats(debug=debug) 471 | }) 472 | return stat 473 | 474 | 475 | # note: we set num_cpus=0 to avoid failing to create replay actors when 476 | # resources are fragmented. This isn't ideal. 477 | @ray.remote(num_cpus=0) 478 | class BatchReplayActor(object): 479 | """The batch replay version of the replay actor. 480 | 481 | This allows for RNN models, but ignores prioritization params. 482 | """ 483 | 484 | def __init__(self, num_shards, learning_starts, buffer_size, 485 | train_batch_size, prioritized_replay_alpha, 486 | prioritized_replay_beta, prioritized_replay_eps): 487 | self.replay_starts = learning_starts // num_shards 488 | self.buffer_size = buffer_size // num_shards 489 | self.train_batch_size = train_batch_size 490 | self.buffer = [] 491 | 492 | # Metrics 493 | self.num_added = 0 494 | self.cur_size = 0 495 | 496 | def get_host(self): 497 | return os.uname()[1] 498 | 499 | def add_batch(self, batch): 500 | # Handle everything as if multiagent 501 | if isinstance(batch, SampleBatch): 502 | batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch}, batch.count) 503 | self.buffer.append(batch) 504 | self.cur_size += batch.count 505 | self.num_added += batch.count 506 | while self.cur_size > self.buffer_size: 507 | self.cur_size -= self.buffer.pop(0).count 508 | 509 | def replay(self): 510 | if self.num_added < self.replay_starts: 511 | return None 512 | return random.choice(self.buffer) 513 | 514 | def update_priorities(self, prio_dict): 515 | pass 516 | 517 | def stats(self, debug=False): 518 | stat = { 519 | "cur_size": self.cur_size, 520 | "num_added": self.num_added, 521 | } 522 | return stat 523 | 524 | 525 | class LearnerThread(threading.Thread): 526 | """Background thread that updates the local model from replay data. 527 | 528 | The learner thread communicates with the main thread through Queues. This 529 | is needed since Ray operations can only be run on the main thread. In 530 | addition, moving heavyweight gradient ops session runs off the main thread 531 | improves overall throughput. 532 | """ 533 | 534 | def __init__(self, local_worker): 535 | threading.Thread.__init__(self) 536 | self.learner_queue_size = WindowStat("size", 50) 537 | self.local_worker = local_worker 538 | self.inqueue = queue.Queue(maxsize=LEARNER_QUEUE_MAX_SIZE) 539 | self.outqueue = queue.Queue() 540 | self.queue_timer = TimerStat() 541 | self.grad_timer = TimerStat() 542 | self.daemon = True 543 | self.weights_updated = False 544 | self.stopped = False 545 | self.stats = {} 546 | 547 | def run(self): 548 | while not self.stopped: 549 | self.step() 550 | 551 | def step(self): 552 | with self.queue_timer: 553 | ra, replay = self.inqueue.get() 554 | if replay is not None: 555 | prio_dict = {} 556 | with self.grad_timer: 557 | grad_out = self.local_worker.learn_on_batch(replay) 558 | for pid, info in grad_out.items(): 559 | prio_dict[pid] = ( 560 | replay.policy_batches[pid].data.get("batch_indexes"), 561 | info.get("td_error")) 562 | self.stats[pid] = get_learner_stats(info) 563 | self.outqueue.put((ra, prio_dict, replay.count)) 564 | self.learner_queue_size.push(self.inqueue.qsize()) 565 | self.weights_updated = True 566 | -------------------------------------------------------------------------------- /copy_to_rllib/optimizers/replay_buffer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import random 7 | import sys 8 | 9 | from ray.rllib.optimizers.segment_tree import SumSegmentTree, MinSegmentTree 10 | from ray.rllib.utils.annotations import DeveloperAPI 11 | from ray.rllib.utils.compression import unpack_if_needed 12 | from ray.rllib.utils.window_stat import WindowStat 13 | 14 | # added for dynamic experience replay 15 | import math 16 | # print texts in green in terminal 17 | def prGreen(skk): print("\033[92m {}\033[00m" .format(skk)) 18 | 19 | @DeveloperAPI 20 | class ReplayBuffer(object): 21 | @DeveloperAPI 22 | def __init__(self, size, permanent_data_length=0): 23 | """Create Prioritized Replay buffer. 24 | 25 | Parameters 26 | ---------- 27 | size: int 28 | Max number of transitions to store in the buffer. When the buffer 29 | overflows the old memories are dropped. 30 | """ 31 | self._storage = [] 32 | self._maxsize = size 33 | self._next_idx = 0 34 | self._hit_count = np.zeros(size) 35 | self._eviction_started = False 36 | self._num_added = 0 37 | self._num_sampled = 0 38 | self._evicted_hit_stats = WindowStat("evicted_hit", 1000) 39 | self._est_size_bytes = 0 40 | self._permanent_data_length = permanent_data_length 41 | 42 | def __len__(self): 43 | return len(self._storage) 44 | 45 | @DeveloperAPI 46 | def add(self, obs_t, action, reward, obs_tp1, done, weight): 47 | data = (obs_t, action, reward, obs_tp1, done) 48 | self._num_added += 1 49 | 50 | if self._next_idx >= len(self._storage): 51 | self._storage.append(data) 52 | self._est_size_bytes += sum(sys.getsizeof(d) for d in data) 53 | else: 54 | self._storage[self._next_idx] = data 55 | if self._next_idx + 1 >= self._maxsize: 56 | self._eviction_started = True 57 | # added for dynamic experience replay 58 | self._next_idx = self._permanent_data_length 59 | else: 60 | self._next_idx = self._next_idx + 1 61 | if self._eviction_started: 62 | self._evicted_hit_stats.push(self._hit_count[self._next_idx]) 63 | self._hit_count[self._next_idx] = 0 64 | 65 | def _encode_sample(self, idxes): 66 | obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] 67 | for i in idxes: 68 | data = self._storage[i] 69 | obs_t, action, reward, obs_tp1, done = data 70 | obses_t.append(np.array(unpack_if_needed(obs_t), copy=False)) 71 | actions.append(np.array(action, copy=False)) 72 | rewards.append(reward) 73 | obses_tp1.append(np.array(unpack_if_needed(obs_tp1), copy=False)) 74 | dones.append(done) 75 | self._hit_count[i] += 1 76 | return (np.array(obses_t), np.array(actions), np.array(rewards), 77 | np.array(obses_tp1), np.array(dones)) 78 | 79 | @DeveloperAPI 80 | def sample_idxes(self, batch_size): 81 | return [ 82 | random.randint(0, 83 | len(self._storage) - 1) for _ in range(batch_size) 84 | ] 85 | 86 | @DeveloperAPI 87 | def sample_with_idxes(self, idxes): 88 | self._num_sampled += len(idxes) 89 | return self._encode_sample(idxes) 90 | 91 | @DeveloperAPI 92 | def sample(self, batch_size): 93 | """Sample a batch of experiences. 94 | 95 | Parameters 96 | ---------- 97 | batch_size: int 98 | How many transitions to sample. 99 | 100 | Returns 101 | ------- 102 | obs_batch: np.array 103 | batch of observations 104 | act_batch: np.array 105 | batch of actions executed given obs_batch 106 | rew_batch: np.array 107 | rewards received as results of executing act_batch 108 | next_obs_batch: np.array 109 | next set of observations seen after executing act_batch 110 | done_mask: np.array 111 | done_mask[i] = 1 if executing act_batch[i] resulted in 112 | the end of an episode and 0 otherwise. 113 | """ 114 | idxes = [ 115 | random.randint(0, 116 | len(self._storage) - 1) for _ in range(batch_size) 117 | ] 118 | self._num_sampled += batch_size 119 | return self._encode_sample(idxes) 120 | 121 | @DeveloperAPI 122 | def stats(self, debug=False): 123 | data = { 124 | "added_count": self._num_added, 125 | "sampled_count": self._num_sampled, 126 | "est_size_bytes": self._est_size_bytes, 127 | "num_entries": len(self._storage), 128 | } 129 | if debug: 130 | data.update(self._evicted_hit_stats.stats()) 131 | return data 132 | 133 | 134 | @DeveloperAPI 135 | class PrioritizedReplayBuffer(ReplayBuffer): 136 | @DeveloperAPI 137 | def __init__(self, size, alpha, 138 | # added for dynamic experience replay 139 | permanent_data_length=0, human_data=None, 140 | dynamic_experience_replay=False, 141 | demonstration_zone_percentage=0): 142 | 143 | """Create Prioritized Replay buffer. 144 | 145 | Parameters 146 | ---------- 147 | size: int 148 | Max number of transitions to store in the buffer. When the buffer 149 | overflows the old memories are dropped. 150 | alpha: float 151 | how much prioritization is used 152 | (0 - no prioritization, 1 - full prioritization) 153 | 154 | See Also 155 | -------- 156 | ReplayBuffer.__init__ 157 | """ 158 | 159 | # added for dynamic experience replay 160 | super(PrioritizedReplayBuffer, self).__init__(size, permanent_data_length) 161 | 162 | assert alpha > 0 163 | self._alpha = alpha 164 | 165 | it_capacity = 1 166 | while it_capacity < size: 167 | it_capacity *= 2 168 | 169 | self._it_sum = SumSegmentTree(it_capacity) 170 | self._it_min = MinSegmentTree(it_capacity) 171 | self._max_priority = 1.0 172 | self._prio_change_stats = WindowStat("reprio", 1000) 173 | 174 | # added for dynamic experience replay 175 | self._permanent_data_length = permanent_data_length 176 | self._demo_index = permanent_data_length 177 | if dynamic_experience_replay and demonstration_zone_percentage > 0: 178 | self._max_demo_size = math.ceil(size * demonstration_zone_percentage) # assign the size for demonstration zone 179 | assert self._max_demo_size > 10000, "demonstration zone is too small, please increase buffer size" 180 | prGreen("the maximum demonstration zone size: {}".format(self._max_demo_size)) 181 | if self._permanent_data_length > 0: 182 | # insert human demos to the buffer with full prioritization 183 | for transition in human_data: 184 | self.add(transition[0], transition[1], transition[2], transition[3], transition[4], 1) 185 | 186 | @DeveloperAPI 187 | def add(self, obs_t, action, reward, obs_tp1, done, weight): 188 | """See ReplayBuffer.store_effect""" 189 | 190 | idx = self._next_idx 191 | super(PrioritizedReplayBuffer, self).add(obs_t, action, reward, 192 | obs_tp1, done, weight) 193 | if weight is None: 194 | weight = self._max_priority 195 | self._it_sum[idx] = weight**self._alpha 196 | self._it_min[idx] = weight**self._alpha 197 | 198 | # added for dynamic experience replay 199 | def update_demos(self, demo_data): 200 | 201 | updated_demo_size = self._permanent_data_length + len(demo_data) 202 | if updated_demo_size < self._max_demo_size: 203 | self._permanent_data_length = updated_demo_size 204 | else: 205 | self._permanent_data_length = self._max_demo_size 206 | 207 | for transition in demo_data: 208 | self._storage[self._demo_index] = transition 209 | self._demo_index = (self._demo_index + 1) % self._max_demo_size 210 | 211 | def _sample_proportional(self, batch_size): 212 | res = [] 213 | for _ in range(batch_size): 214 | # TODO(szymon): should we ensure no repeats? 215 | mass = random.random() * self._it_sum.sum(0, len(self._storage)) 216 | idx = self._it_sum.find_prefixsum_idx(mass) 217 | res.append(idx) 218 | return res 219 | 220 | @DeveloperAPI 221 | def sample_idxes(self, batch_size): 222 | return self._sample_proportional(batch_size) 223 | 224 | @DeveloperAPI 225 | def sample_with_idxes(self, idxes, beta): 226 | assert beta > 0 227 | self._num_sampled += len(idxes) 228 | 229 | weights = [] 230 | p_min = self._it_min.min() / self._it_sum.sum() 231 | max_weight = (p_min * len(self._storage))**(-beta) 232 | 233 | for idx in idxes: 234 | p_sample = self._it_sum[idx] / self._it_sum.sum() 235 | weight = (p_sample * len(self._storage))**(-beta) 236 | weights.append(weight / max_weight) 237 | weights = np.array(weights) 238 | encoded_sample = self._encode_sample(idxes) 239 | return tuple(list(encoded_sample) + [weights, idxes]) 240 | 241 | @DeveloperAPI 242 | def sample(self, batch_size, beta): 243 | """Sample a batch of experiences. 244 | 245 | compared to ReplayBuffer.sample 246 | it also returns importance weights and idxes 247 | of sampled experiences. 248 | 249 | 250 | Parameters 251 | ---------- 252 | batch_size: int 253 | How many transitions to sample. 254 | beta: float 255 | To what degree to use importance weights 256 | (0 - no corrections, 1 - full correction) 257 | 258 | Returns 259 | ------- 260 | obs_batch: np.array 261 | batch of observations 262 | act_batch: np.array 263 | batch of actions executed given obs_batch 264 | rew_batch: np.array 265 | rewards received as results of executing act_batch 266 | next_obs_batch: np.array 267 | next set of observations seen after executing act_batch 268 | done_mask: np.array 269 | done_mask[i] = 1 if executing act_batch[i] resulted in 270 | the end of an episode and 0 otherwise. 271 | weights: np.array 272 | Array of shape (batch_size,) and dtype np.float32 273 | denoting importance weight of each sampled transition 274 | idxes: np.array 275 | Array of shape (batch_size,) and dtype np.int32 276 | idexes in buffer of sampled experiences 277 | """ 278 | assert beta > 0 279 | self._num_sampled += batch_size 280 | 281 | idxes = self._sample_proportional(batch_size) 282 | 283 | weights = [] 284 | p_min = self._it_min.min() / self._it_sum.sum() 285 | max_weight = (p_min * len(self._storage))**(-beta) 286 | 287 | for idx in idxes: 288 | p_sample = self._it_sum[idx] / self._it_sum.sum() 289 | weight = (p_sample * len(self._storage))**(-beta) 290 | weights.append(weight / max_weight) 291 | weights = np.array(weights) 292 | encoded_sample = self._encode_sample(idxes) 293 | return tuple(list(encoded_sample) + [weights, idxes]) 294 | 295 | @DeveloperAPI 296 | def update_priorities(self, idxes, priorities): 297 | """Update priorities of sampled transitions. 298 | 299 | sets priority of transition at index idxes[i] in buffer 300 | to priorities[i]. 301 | 302 | Parameters 303 | ---------- 304 | idxes: [int] 305 | List of idxes of sampled transitions 306 | priorities: [float] 307 | List of updated priorities corresponding to 308 | transitions at the sampled idxes denoted by 309 | variable `idxes`. 310 | """ 311 | assert len(idxes) == len(priorities) 312 | for idx, priority in zip(idxes, priorities): 313 | assert priority > 0 314 | assert 0 <= idx < len(self._storage) 315 | delta = priority**self._alpha - self._it_sum[idx] 316 | self._prio_change_stats.push(delta) 317 | self._it_sum[idx] = priority**self._alpha 318 | self._it_min[idx] = priority**self._alpha 319 | 320 | self._max_priority = max(self._max_priority, priority) 321 | 322 | @DeveloperAPI 323 | def stats(self, debug=False): 324 | parent = ReplayBuffer.stats(self, debug) 325 | if debug: 326 | parent.update(self._prio_change_stats.stats()) 327 | return parent 328 | -------------------------------------------------------------------------------- /devices/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | import warnings 5 | 6 | from devices.devices.nothing import Nothing 7 | from devices.devices.pybulletgui import PyBulletGUI 8 | from devices.devices.spacemouse import SpaceMouse 9 | from devices.devices.xboxcontroller import XBoxController 10 | 11 | # include classes in registry 12 | REGISTRY = {} 13 | for cls in [Nothing, PyBulletGUI, SpaceMouse, XBoxController]: 14 | # noinspection PyBroadException 15 | try: 16 | d = cls() 17 | REGISTRY[d.codename] = cls 18 | REGISTRY[d.clsname] = cls 19 | except Exception as e: 20 | warnings.warn('{} could not be registered due to ' 21 | 'Exception: {}'.format(cls.__name__, e)) 22 | -------------------------------------------------------------------------------- /devices/action.py: -------------------------------------------------------------------------------- 1 | #! usr/bin/env python 2 | """ 3 | 4 | """ 5 | 6 | from threading import Event 7 | 8 | X = 'x' 9 | Y = 'y' 10 | Z = 'z' 11 | RX = 'rx' 12 | RY = 'ry' 13 | RZ = 'rz' 14 | 15 | 16 | class Action: 17 | def __init__(self): 18 | """ 19 | Base action class. 20 | """ 21 | self._action = {} 22 | for name in [X, Y, Z, RX, RY, RZ]: 23 | self._action[name] = 0.0 24 | self._event = Event() 25 | 26 | def await_new(self, timeout=None): 27 | """ 28 | Wait for this device to have new data. 29 | :param timeout: float or None, timeout 30 | :return: bool, True if data acquired before timeout else False 31 | """ 32 | return self._event.wait(timeout) 33 | 34 | @property 35 | def has_new(self): 36 | """ 37 | Get whether this device has new data. 38 | :return: bool, True if new data available else False 39 | """ 40 | return self._event.isSet() 41 | 42 | @property 43 | def x(self): 44 | """ 45 | Action attribute getter. 46 | :return: float 47 | """ 48 | self._event.clear() 49 | return self._action[X] 50 | 51 | @x.setter 52 | def x(self, value): 53 | """ 54 | Action attribute setter. 55 | :param value: float 56 | """ 57 | self._event.set() 58 | self._action[X] = float(value) 59 | 60 | @property 61 | def y(self): 62 | """ 63 | Action attribute getter. 64 | :return: float 65 | """ 66 | self._event.clear() 67 | return self._action[Y] 68 | 69 | @y.setter 70 | def y(self, value): 71 | """ 72 | Action attribute setter. 73 | :param value: float 74 | """ 75 | self._event.set() 76 | self._action[Y] = float(value) 77 | 78 | @property 79 | def z(self): 80 | """ 81 | Action attribute getter. 82 | :return: float 83 | """ 84 | self._event.clear() 85 | return self._action[Z] 86 | 87 | @z.setter 88 | def z(self, value): 89 | """ 90 | Action attribute setter. 91 | :param value: float 92 | """ 93 | self._event.set() 94 | self._action[Z] = float(value) 95 | 96 | @property 97 | def rx(self): 98 | """ 99 | Action attribute getter. 100 | :return: float 101 | """ 102 | self._event.clear() 103 | return self._action[RX] 104 | 105 | @rx.setter 106 | def rx(self, value): 107 | """ 108 | Action attribute setter. 109 | :param value: float 110 | """ 111 | self._event.set() 112 | self._action[RX] = float(value) 113 | 114 | @property 115 | def ry(self): 116 | """ 117 | Action attribute getter. 118 | :return: float 119 | """ 120 | self._event.clear() 121 | return self._action[RY] 122 | 123 | @ry.setter 124 | def ry(self, value): 125 | """ 126 | Action attribute setter. 127 | :param value: float 128 | """ 129 | self._event.set() 130 | self._action[RY] = float(value) 131 | 132 | @property 133 | def rz(self): 134 | """ 135 | Action attribute getter. 136 | :return: float 137 | """ 138 | self._event.clear() 139 | return self._action[RZ] 140 | 141 | @rz.setter 142 | def rz(self, value): 143 | """ 144 | Action attribute setter. 145 | :param value: float 146 | """ 147 | self._event.set() 148 | self._action[RZ] = float(value) 149 | 150 | @property 151 | def pose(self): 152 | """ 153 | Get pose. 154 | :return: tuple 155 | """ 156 | self._event.clear() 157 | return [self._action[name] for name in [X, Y, Z, RX, RY, RZ]] 158 | 159 | @pose.setter 160 | def pose(self, tup): 161 | """ 162 | Set pose. 163 | :param tup: tuple 164 | """ 165 | for i, name in enumerate([X, Y, Z, RX, RY, RZ]): 166 | self._action[name] = float(tup[i]) 167 | self._event.set() 168 | 169 | @property 170 | def pos(self): 171 | """ 172 | Get position. 173 | :return: tuple 174 | """ 175 | self._event.clear() 176 | return [self._action[name] for name in [X, Y, Z]] 177 | 178 | @pos.setter 179 | def pos(self, tup): 180 | """ 181 | Set position. 182 | :param tup: tuple 183 | """ 184 | for i, name in enumerate([X, Y, Z]): 185 | self._action[name] = float(tup[i]) 186 | self._event.set() 187 | 188 | @property 189 | def orn(self): 190 | """ 191 | Get orientation. 192 | :return: tuple 193 | """ 194 | self._event.clear() 195 | return [self._action[name] for name in [RX, RY, RZ]] 196 | 197 | @orn.setter 198 | def orn(self, tup): 199 | """ 200 | Set orientation. 201 | :param tup: tuple 202 | """ 203 | for i, name in enumerate([RX, RY, RZ]): 204 | self._action[name] = float(tup[i]) 205 | self._event.set() 206 | -------------------------------------------------------------------------------- /devices/callbacks.py: -------------------------------------------------------------------------------- 1 | #! usr/bin/env python 2 | """ 3 | 4 | """ 5 | import inspect 6 | 7 | 8 | def handle(func, *args, **kwargs): 9 | """ 10 | Handles function callbacks, typically on execution of a class method. 11 | :param func: func, callback function 12 | :param args: arguments, optional 13 | :param kwargs: keyword arguments, optional 14 | """ 15 | if func is not None: 16 | try: 17 | return func(*args, **kwargs) 18 | except TypeError: 19 | return func() 20 | return 21 | 22 | 23 | def validate(func, allow_args=False, allow_return=False, require_self=False): 24 | """ 25 | Checks whether a function parameter is valid. The selected rules must be 26 | observed when implementing a function into this framework, however, this 27 | is implementation dependent. 28 | :param func: func, function must be callable or None. 29 | :param allow_args: bool, if False, func may not have input arguments. 30 | :param allow_return: bool, if False, func may not contain a return statement. 31 | :param require_self: bool, if False, func requires the first argument `self`. 32 | """ 33 | assert callable(func) or func is None, \ 34 | 'function must be callable or None' 35 | if callable(func): 36 | if not allow_args and not require_self: 37 | signature = inspect.signature(func) 38 | assert len(signature.parameters) == 0, \ 39 | 'func may not have inputs arguments' 40 | if not allow_return: 41 | lines, _ = inspect.getsourcelines(func) 42 | assert not lines[-1].lstrip().startswith('return'), \ 43 | 'func may not contain return statement' 44 | if require_self: 45 | signature = inspect.signature(func) 46 | assert next(iter(signature.parameters)) == 'self', \ 47 | 'func requires the first argument `self`' 48 | 49 | 50 | -------------------------------------------------------------------------------- /devices/device.py: -------------------------------------------------------------------------------- 1 | #! usr/bin/env python 2 | """ 3 | 4 | """ 5 | 6 | from abc import ABC 7 | 8 | from devices.action import Action 9 | from devices.pconnect import ConnectPattern 10 | from devices.pdisconnect import DisconnectPattern 11 | from devices.pupdate import UpdatePattern 12 | 13 | 14 | class InputDevice(ConnectPattern, DisconnectPattern, UpdatePattern, Action, ABC): 15 | def __init__(self, pos_scaling=1.0, orn_scaling=1.0): 16 | """ 17 | Initialize input device. The class methods Connect, Disconnect, and 18 | Update are abstract and must be implemented in all device subclasses. 19 | :param pos_scaling: float, scaling applied to input positions 20 | :param orn_scaling: float, scaling applied to input orientations 21 | """ 22 | ConnectPattern.__init__(self) 23 | DisconnectPattern.__init__(self) 24 | UpdatePattern.__init__(self) 25 | Action.__init__(self) 26 | # Initialize properties 27 | self.action = Action() 28 | self.pos_scaling = pos_scaling 29 | self.orn_scaling = orn_scaling 30 | self._device = None 31 | 32 | @property 33 | def clsname(self): 34 | """ 35 | Get the name of this device class. 36 | :return: str 37 | """ 38 | return self.__class__.__name__.lower() 39 | 40 | @property 41 | def codename(self): 42 | """ 43 | Get the codename of this device class. 44 | :return: 45 | """ 46 | raise NotImplementedError 47 | 48 | def start(self): 49 | """ 50 | Start this device. 51 | """ 52 | if not self._is_connected: 53 | self.connect() 54 | -------------------------------------------------------------------------------- /devices/devices/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/devices/devices/__init__.py -------------------------------------------------------------------------------- /devices/devices/nothing.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | import random 6 | 7 | from devices.device import InputDevice 8 | 9 | 10 | class Nothing(InputDevice): 11 | def __init__(self, *args, **kwargs): 12 | InputDevice.__init__(self, *args, **kwargs) 13 | self.is_random = True 14 | 15 | @property 16 | def codename(self): 17 | return 'nth' 18 | 19 | def _connect(self): 20 | self._is_connected = True 21 | 22 | def _disconnect(self): 23 | self._is_connected = False 24 | 25 | def _update(self): 26 | action = [] 27 | for i in range(6): 28 | a = random.uniform(-1.0, 1.0) if self.is_random else 0.0 29 | a *= self.pos_scaling if i < 3 else self.orn_scaling 30 | action.append(a) 31 | self.pose = action 32 | -------------------------------------------------------------------------------- /devices/devices/pybulletgui.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | import pybullet as p 6 | 7 | from devices.device import InputDevice 8 | 9 | 10 | class PyBulletGUI(InputDevice): 11 | def __init__(self, *args, **kwargs): 12 | InputDevice.__init__(self, *args, **kwargs) 13 | 14 | @property 15 | def codename(self): 16 | return 'pbg' 17 | 18 | def _connect(self): 19 | assert p.isConnected(), 'PyBullet not connected.' 20 | self._axis_ids = [] 21 | keys = list(self._action.keys()) 22 | for i in range(6): 23 | self._axis_ids.append( 24 | p.addUserDebugParameter( 25 | paramName=keys[i], 26 | rangeMin=-1, 27 | rangeMax=1, 28 | startValue=0)) 29 | self._is_connected = True 30 | 31 | def _disconnect(self): 32 | pass 33 | 34 | def _update(self): 35 | action = [] 36 | for i in range(6): 37 | value = p.readUserDebugParameter(self._axis_ids[i]) 38 | value *= self.pos_scaling if i < 3 else self.orn_scaling 39 | action.append(value) 40 | self.pose = action 41 | 42 | 43 | if __name__ == '__main__': 44 | 45 | def printout(s): 46 | print(s.pose) 47 | 48 | 49 | s = PyBulletGUI() 50 | s.on_update = printout 51 | import time 52 | 53 | while True: 54 | time.sleep(1) 55 | -------------------------------------------------------------------------------- /devices/devices/spacemouse.py: -------------------------------------------------------------------------------- 1 | #! usr/bin/env python 2 | """ 3 | 4 | """ 5 | 6 | import pygame 7 | from pygame.constants import JOYAXISMOTION 8 | 9 | from devices.device import InputDevice 10 | 11 | 12 | class SpaceMouse(InputDevice): 13 | def __init__(self, *args, **kwargs): 14 | InputDevice.__init__(self, *args, **kwargs) 15 | 16 | @property 17 | def codename(self): 18 | return 'spm' 19 | 20 | def _connect(self): 21 | pygame.init() 22 | pygame.joystick.init() 23 | assert pygame.joystick.get_count() > 0, \ 24 | 'No joystick found!' 25 | self._device = pygame.joystick.Joystick(0) 26 | self._device.init() 27 | self._is_connected = True 28 | 29 | def _disconnect(self, *args, **kwargs): 30 | pass 31 | 32 | def _update(self): 33 | action = [] 34 | for event in pygame.event.get(): 35 | if event.type == JOYAXISMOTION: 36 | action = [self._device.get_axis(i) for i in range(6)] 37 | if len(action) == 0: 38 | return 39 | for i in range(6): 40 | action[i] *= self.pos_scaling if i < 3 else self.orn_scaling 41 | action[0] = -action[0] # x direction is positive 42 | self.pose = tuple(action) 43 | 44 | 45 | if __name__ == '__main__': 46 | 47 | def printout(s): 48 | print(s.pose) 49 | 50 | 51 | s = SpaceMouse() 52 | s.start() 53 | s.on_update = printout 54 | import time 55 | 56 | while True: 57 | time.sleep(1) 58 | -------------------------------------------------------------------------------- /devices/devices/xboxcontroller.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | import pygame 6 | from pygame.locals import * 7 | 8 | from devices.device import InputDevice 9 | 10 | 11 | class XBoxController(InputDevice): 12 | def __init__(self, *args, **kwargs): 13 | InputDevice.__init__(self, *args, **kwargs) 14 | 15 | @property 16 | def codename(self): 17 | return 'xbc' 18 | 19 | def _connect(self): 20 | pygame.init() 21 | pygame.joystick.init() 22 | assert pygame.joystick.get_count() > 0, \ 23 | 'No joystick found!' 24 | self._device = pygame.joystick.Joystick(0) 25 | self._device.init() 26 | self._z_directions = [1, 1] 27 | self._is_connected = True 28 | 29 | def _disconnect(self): 30 | pass 31 | 32 | def _update(self): 33 | action = [] 34 | for event in pygame.event.get(): 35 | if event.type == JOYBUTTONUP or event.type == JOYBUTTONDOWN: 36 | button_state = [self._device.get_button(i) for i in range(15)] 37 | self._z_directions[0] *= -1 if button_state[8] else 1 38 | self._z_directions[1] *= -1 if button_state[9] else 1 39 | if event.type == JOYAXISMOTION: 40 | action = [self._device.get_axis(i) for i in range(6)] 41 | x = action[0] 42 | y = action[1] * -1 43 | z = (action[4] + 1) / 2 * self._z_directions[0] 44 | rx = action[2] 45 | ry = action[3] * -1 46 | rz = (action[5] + 1) / 2 * self._z_directions[1] 47 | action = [x, y, z, rx, ry, rz] 48 | if len(action) == 0: 49 | return 50 | for i in range(6): 51 | action[i] = 0.0 if abs(action[i]) < 0.001 else action[i] # hpf 52 | action[i] *= self.pos_scaling if i < 3 else self.orn_scaling 53 | action[i] = round(action[i], 5) 54 | self.pose = action 55 | 56 | 57 | if __name__ == '__main__': 58 | 59 | def printout(s): 60 | print(s.pose) 61 | 62 | 63 | s = XBoxController() 64 | s.start() 65 | s.on_update = printout 66 | import time 67 | 68 | while True: 69 | time.sleep(1) 70 | -------------------------------------------------------------------------------- /devices/pconnect.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from devices.callbacks import handle, validate 4 | 5 | 6 | class ConnectPattern(ABC): 7 | def __init__(self): 8 | self._on_connect = None 9 | if '_is_connected' not in self.__dict__: 10 | self._is_connected = False 11 | 12 | @property 13 | def is_connected(self): 14 | """ 15 | Get connection state. 16 | :return: bool, True if connected else False. 17 | """ 18 | return self._is_connected 19 | 20 | def connect(self, *args, **kwargs): 21 | """ 22 | Connect method. 23 | :param args: arguments 24 | :param kwargs: keyword arguments 25 | :return: bool, result 26 | """ 27 | result = self._connect(*args, **kwargs) 28 | self._is_connected = result 29 | handle(self._on_connect, self) 30 | return result 31 | 32 | @abstractmethod 33 | def _connect(self, *args, **kwargs): 34 | """ 35 | Called by public method. 36 | :param args: arguments 37 | :param kwargs: keyword arguments 38 | :return: bool, result 39 | """ 40 | pass 41 | 42 | @property 43 | def on_connect(self): 44 | """ 45 | Get callback. 46 | :return: func, callback 47 | """ 48 | return self._on_connect 49 | 50 | @on_connect.setter 51 | def on_connect(self, func): 52 | """ 53 | Set callback. 54 | :param func: func, callback 55 | """ 56 | validate(func, allow_args=True, allow_return=True) 57 | self._on_connect = func 58 | -------------------------------------------------------------------------------- /devices/pdisconnect.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from devices.callbacks import handle, validate 4 | 5 | 6 | class DisconnectPattern(ABC): 7 | def __init__(self): 8 | self._on_disconnect = None 9 | if '_is_connected' not in self.__dict__: 10 | self._is_connected = False 11 | 12 | def disconnect(self, *args, **kwargs): 13 | """ 14 | Disconnect method. 15 | :param args: arguments 16 | :param kwargs: keyword arguments 17 | :return: bool, result 18 | """ 19 | result = self._disconnect(*args, **kwargs) 20 | self._is_connected = result 21 | handle(self._on_disconnect, self) 22 | return result 23 | 24 | @abstractmethod 25 | def _disconnect(self, *args, **kwargs): 26 | """ 27 | Called by public method. 28 | :param args: arguments 29 | :param kwargs: keyword arguments 30 | :return: bool, result 31 | """ 32 | pass 33 | 34 | @property 35 | def on_disconnect(self): 36 | """ 37 | Get callback. 38 | :return: func, callback 39 | """ 40 | return self._on_disconnect 41 | 42 | @on_disconnect.setter 43 | def on_disconnect(self, func): 44 | """ 45 | Set callback. 46 | :param func: func, callback 47 | """ 48 | validate(func, allow_args=True, allow_return=True) 49 | self._on_disconnect = func -------------------------------------------------------------------------------- /devices/pupdate.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from devices.callbacks import handle, validate 4 | 5 | 6 | class UpdatePattern(ABC): 7 | def __init__(self): 8 | self._on_update = None 9 | 10 | def update(self, *args, **kwargs): 11 | """ 12 | Update method. 13 | :param args: arguments 14 | :param kwargs: keyword arguments 15 | :return: bool, result 16 | """ 17 | result = self._update(*args, **kwargs) 18 | handle(self._on_update, self) 19 | return result 20 | 21 | @abstractmethod 22 | def _update(self, *args, **kwargs): 23 | """ 24 | Called by public method. 25 | :param args: arguments 26 | :param kwargs: keyword arguments 27 | :return: bool, result 28 | """ 29 | pass 30 | 31 | @property 32 | def on_update(self): 33 | """ 34 | Get callback. 35 | :return: func, callback 36 | """ 37 | return self._on_update 38 | 39 | @on_update.setter 40 | def on_update(self, func): 41 | """ 42 | Set callback. 43 | :param func: func, callback 44 | """ 45 | validate(func, allow_args=True, allow_return=True) 46 | self._on_update = func 47 | -------------------------------------------------------------------------------- /envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/__init__.py -------------------------------------------------------------------------------- /envs/robot_real_example.py: -------------------------------------------------------------------------------- 1 | import utilities as util 2 | 3 | # modules for interfacing with robot control and FT sensor 4 | # from robot_control_interface import ControlInterface 5 | # from ft_sensor_interface import FTInterface 6 | 7 | 8 | class RobotRealExample(): 9 | 10 | def __init__(self): 11 | 12 | # Connect the interfaces 13 | self.robot_interface = ControlInterface() 14 | self.ft_interface = FTInterface() 15 | 16 | @staticmethod 17 | def decompose_incoming_pose_data(data): 18 | position = data[:3] 19 | rotation = data[3:7] 20 | 21 | return [position, rotation] 22 | 23 | def get_member_pose(self): 24 | self.robot_interface.receive() 25 | data_in = self.robot_interface.message_in.values 26 | values = self.decompose_incoming_pose_data(data_in) 27 | position_m = values[0] 28 | rotation_quat = values[1] 29 | 30 | return [position_m, rotation_quat] 31 | 32 | @staticmethod 33 | def get_target_pose(): 34 | # target at world origin 35 | return [0, 0, 0], util.xyzw_by_euler([0, 0, 0], 'sxyz') 36 | 37 | def get_force_torque(self): 38 | self.ft_interface.receive() 39 | data_in = self.ft_interface.message_in.values 40 | force_torque = data_in 41 | 42 | return force_torque 43 | 44 | def apply_action_pose(self, delta, done): 45 | relative_pos = delta[0:3] 46 | relative_orn = delta[3:6] 47 | 48 | data_out = list(relative_pos) + list(relative_orn) + [done] 49 | 50 | self.robot_interface.send(data_out) 51 | 52 | def apply_action_position(self, delta, done): 53 | data_out = list(delta) + [0, 0, 0] + [done] 54 | 55 | self.robot_interface.send(data_out) 56 | -------------------------------------------------------------------------------- /envs/robot_sim_panda.py: -------------------------------------------------------------------------------- 1 | import math 2 | import pybullet as p 3 | import numpy as np 4 | import utilities as util 5 | 6 | INITIAL_POS = [0.0, 0.0, 0.0] 7 | INITIAL_ORN = [0, 0, 0, 1] 8 | TARGET_POS = [0.6, 0.044, 0] 9 | TARGET_ORN = [0, 0, 0, 1] 10 | HOLE_OFFSET = [0.048485, -0.04305, -0.03] 11 | 12 | URDF_PATH_TOOL = 'envs/urdf/panda_peg_in_hole/panda_peg' 13 | URDF_PATH_TARGET = 'envs/urdf/panda_peg_in_hole/insertion_box' 14 | 15 | # Official initial positions for Panda [0, -45, 0, -135, 0, 90, 45] 16 | INITIAL_PANDA_JOINTS = [math.radians(0), math.radians(20), math.radians(0), math.radians(-103), math.radians(0), 17 | math.radians(122), math.radians(45)] 18 | 19 | 20 | class RobotSimPanda(): 21 | 22 | def __init__(self): 23 | 24 | self.uid = p.loadURDF(util.format_urdf_filepath(URDF_PATH_TOOL), 25 | basePosition=INITIAL_POS, 26 | baseOrientation=INITIAL_ORN, 27 | useFixedBase=1) 28 | 29 | self.target_uid = p.loadURDF(util.format_urdf_filepath(URDF_PATH_TARGET), 30 | basePosition=TARGET_POS, 31 | baseOrientation=TARGET_ORN, 32 | useFixedBase=1) 33 | self.link_target = 0 34 | util.display_frame_axis(self.target_uid, self.link_target) 35 | 36 | # In urdf, the panda robot has 11 joints 37 | # 7 joints belong to the arm, each with a FT sensor 38 | self.num_arm_joints = 7 39 | 40 | # FT sensor at the last arm joint 41 | self.link_sensor = 6 42 | util.display_frame_axis(self.uid, self.link_sensor) 43 | 44 | # The 8th link: a virtual link at the end of the arm, as the end effector 45 | self.link_ee = 7 46 | util.display_frame_axis(self.uid, self.link_ee) 47 | 48 | self.link_member = 8 49 | util.display_frame_axis(self.uid, self.link_member) 50 | 51 | self.max_force = 200 52 | self.max_velocity = 0.35 53 | 54 | self.joint_positions = INITIAL_PANDA_JOINTS 55 | 56 | # Apply the joint positions 57 | for jointIndex in range(self.num_arm_joints): 58 | p.resetJointState(self.uid, jointIndex, self.joint_positions[jointIndex]) 59 | p.setJointMotorControl2(self.uid, jointIndex, p.POSITION_CONTROL, 60 | targetPosition=self.joint_positions[jointIndex], force=self.max_force, 61 | maxVelocity=self.max_velocity, positionGain=0.3, velocityGain=1) 62 | 63 | # Important to call getLinkState only once at the beginning, otherwise PyBullet bug makes the robot unstable. 64 | ee_pose = p.getLinkState(self.uid, self.link_ee) 65 | self.ee_position = list(ee_pose[0]) 66 | self.ee_orientation = list(ee_pose[1]) 67 | 68 | def get_member_pose(self): 69 | link_member_pose = p.getLinkState(self.uid, self.link_member) 70 | return [link_member_pose[0], link_member_pose[1]] 71 | 72 | def get_target_pose(self): 73 | pose = p.getLinkState(self.target_uid, self.link_target)[4:6] 74 | # Offset the hole center from corner 75 | return [(np.array(pose[0]) + HOLE_OFFSET).tolist(), pose[1]] 76 | 77 | def enable_force_torque_sensor(self): 78 | # FT sensor is measured at the center of mass in Pybullet 79 | p.enableJointForceTorqueSensor(self.uid, self.link_sensor) 80 | 81 | def get_force_torque(self): 82 | # FT reading in pybullet is negated and needs to be scaled down 83 | ft = np.multiply(-0.1, p.getJointState(self.uid, self.link_sensor)[2]).tolist() 84 | return ft 85 | 86 | def apply_action_pose(self, delta_pose): 87 | for i in range(3): 88 | self.ee_position[i] += delta_pose[i] 89 | 90 | orn = list(p.getEulerFromQuaternion(self.ee_orientation)) 91 | for i in range(3): 92 | orn[i] += delta_pose[i+3] 93 | self.ee_orientation = p.getQuaternionFromEuler(orn) 94 | 95 | joint_positions = p.calculateInverseKinematics(self.uid, self.num_arm_joints, self.ee_position, self.ee_orientation) 96 | 97 | for i in range(self.num_arm_joints): 98 | p.setJointMotorControl2(bodyUniqueId=self.uid, jointIndex=i, controlMode=p.POSITION_CONTROL, 99 | targetPosition=joint_positions[i], targetVelocity=0, force=self.max_force, 100 | maxVelocity=self.max_velocity, positionGain=0.3, velocityGain=1) 101 | 102 | def apply_action_position(self, delta_pos): 103 | for i in range(3): 104 | self.ee_position[i] += delta_pos[i] 105 | 106 | joint_positions = p.calculateInverseKinematics(self.uid, self.num_arm_joints, self.ee_position, self.ee_orientation) 107 | 108 | for i in range(self.num_arm_joints): 109 | p.setJointMotorControl2(bodyUniqueId=self.uid, jointIndex=i, controlMode=p.POSITION_CONTROL, 110 | targetPosition=joint_positions[i], targetVelocity=0, force=self.max_force, 111 | maxVelocity=self.max_velocity, positionGain=0.3, velocityGain=1) 112 | 113 | 114 | -------------------------------------------------------------------------------- /envs/robot_sim_robotless.py: -------------------------------------------------------------------------------- 1 | import math 2 | import pybullet as p 3 | import numpy as np 4 | import transforms3d 5 | import utilities as util 6 | 7 | # lap joint task 8 | INITIAL_POS = np.array([0.0, 0.0, 0.24]) 9 | INITIAL_ORN = util.mat33_to_quat(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])) 10 | TARGET_POS = np.array([0, 0, 0]) 11 | TARGET_ORN = np.array([0, 0, math.pi]) 12 | URDF_PATH_TOOL = 'envs/urdf/robotless_lap_joint/tool' 13 | URDF_PATH_TARGET = 'envs/urdf/robotless_lap_joint/task_lap_90deg' 14 | 15 | 16 | class RobotSimRobotless(): 17 | 18 | def __init__(self): 19 | 20 | self.uid = p.loadURDF(util.format_urdf_filepath(URDF_PATH_TOOL), 21 | basePosition=INITIAL_POS, 22 | baseOrientation=INITIAL_ORN, 23 | useFixedBase=0) 24 | self.link_member = 2 25 | self.link_gripper = 1 26 | self.link_sensor = 0 27 | 28 | self.target_uid = p.loadURDF(util.format_urdf_filepath(URDF_PATH_TARGET), 29 | basePosition=TARGET_POS, 30 | baseOrientation=util.xyzw_by_euler(TARGET_ORN, 'sxyz'), 31 | useFixedBase=1) 32 | self.link_target = 0 33 | util.display_frame_axis(self.target_uid, self.link_target) 34 | 35 | self.max_force = -1 36 | 37 | # set friction 38 | """ 39 | p.changeDynamics(self.uid, 40 | self.link_member, 41 | lateralFriction=1.0) 42 | p.changeDynamics(self.target_uid, 43 | 1, 44 | lateralFriction=1.0) 45 | """ 46 | 47 | self.base_constraint = p.createConstraint( 48 | parentBodyUniqueId=self.uid, 49 | parentLinkIndex=-1, # base index 50 | childBodyUniqueId=-1, # base index 51 | childLinkIndex=-1, # base index 52 | jointType=p.JOINT_FIXED, 53 | jointAxis=[0, 0, 0], 54 | parentFramePosition=[0, 0, 0], 55 | childFramePosition=INITIAL_POS, 56 | childFrameOrientation=INITIAL_ORN 57 | ) 58 | 59 | self.base_pose = p.getBasePositionAndOrientation(self.uid) 60 | self.gripper_pose = p.getLinkState(self.uid, self.link_gripper)[4:6] 61 | self.member_pose = p.getLinkState(self.uid, self.link_member)[4:6] 62 | 63 | self.from_base_to_member = util.get_f1_to_f2_xform(self.base_pose, self.member_pose) 64 | 65 | for link_index in range(p.getNumJoints(self.uid)): 66 | util.display_frame_axis(self.uid, link_index) 67 | 68 | def get_member_pose(self): 69 | base_pose = p.getBasePositionAndOrientation(self.uid) 70 | member_pose_mat = util.transform_mat(self.from_base_to_member, 71 | util.mat44_by_pos_quat(base_pose[0], base_pose[1])) 72 | member_pose = util.mat44_to_pos_quat(member_pose_mat) 73 | 74 | return [member_pose[0], member_pose[1]] 75 | 76 | def get_target_pose(self): 77 | return p.getLinkState(self.target_uid, self.link_target)[4:6] 78 | 79 | def enable_force_torque_sensor(self): 80 | # FT sensor is measured at the center of mass in Pybullet 81 | p.enableJointForceTorqueSensor(self.uid, self.link_sensor) 82 | 83 | def get_force_torque(self): 84 | # FT reading in pybullet is negated and needs to be scaled down 85 | return np.multiply(-0.1, p.getJointState(self.uid, self.link_sensor)[2]).tolist() 86 | 87 | def apply_action_pose(self, delta_pose): 88 | # pybullet oddity: 89 | # mysterious 5-time relationship between commanded velocity and resulting velocity, when using the changeConstraint() method 90 | delta_pose = np.multiply(np.array(delta_pose), 5.0) 91 | 92 | relative_pos = np.array(delta_pose[0:3]) 93 | base_pos, base_orn = p.getBasePositionAndOrientation(self.uid) 94 | new_pos = np.add(np.array(base_pos), relative_pos).tolist() 95 | 96 | ang_vel_quat = [0, delta_pose[3], delta_pose[4], delta_pose[5]] 97 | new_orn = np.add(base_orn, np.multiply(0.5, 98 | util.wxyz_to_xyzw(transforms3d.quaternions.qmult(ang_vel_quat, util.xyzw_to_wxyz(base_orn))))) 99 | 100 | p.changeConstraint(self.base_constraint, new_pos, new_orn, self.max_force) 101 | 102 | def apply_action_position(self, delta_pos): 103 | # pybullet oddity: 104 | # mysterious 5-time relationship between commanded velocity and resulting velocity, when using the changeConstraint() method 105 | delta_pos = np.multiply(np.array(delta_pos), 5.0) 106 | 107 | base_pos, base_orn = p.getBasePositionAndOrientation(self.uid) 108 | new_pos = np.add(np.array(base_pos), delta_pos).tolist() 109 | 110 | p.changeConstraint(self.base_constraint, new_pos, INITIAL_ORN, self.max_force) 111 | 112 | 113 | -------------------------------------------------------------------------------- /envs/task.py: -------------------------------------------------------------------------------- 1 | import math 2 | from abc import ABC, abstractmethod 3 | 4 | import gym 5 | import numpy as np 6 | from gym import spaces 7 | 8 | import utilities as util 9 | 10 | WRITE_CSV = False 11 | 12 | 13 | class Task(ABC, gym.Env): 14 | 15 | def __init__(self, 16 | time_step=None, 17 | max_steps=None, 18 | step_limit=None, 19 | action_dim=None, 20 | max_vel=None, 21 | max_rad=None, 22 | ft_obs_only=None, 23 | limit_ft=None, 24 | max_ft=None, 25 | max_position_range=None, 26 | dist_threshold=None): 27 | super().__init__() 28 | 29 | self._max_step = max_steps 30 | self._step_limit = step_limit 31 | # max linear and rotational velocity command 32 | self._max_vel = max_vel 33 | self._max_rad = max_rad 34 | # only use force torque as observation 35 | self._ft_obs_only = ft_obs_only 36 | 37 | self._time_step = time_step 38 | self._observation = [] 39 | self._env_step_counter = 0 40 | self._num_success = 0 41 | 42 | self._limit_force_torque = limit_ft 43 | self._max_force_torque = max_ft 44 | self._force_torque_violations = [0.0] * len(self._max_force_torque) 45 | self._ft_range_ratio = 1 46 | 47 | """ Define Gym Spaces for observations and actions """ 48 | self._max_pos_range = max_position_range 49 | if self._ft_obs_only: # no pose observation 50 | self.observation_dim = len(self._max_force_torque) 51 | observation_high = np.array(self._max_force_torque) 52 | observation_low = - observation_high 53 | elif action_dim == 6: # 6 DOF 54 | self.observation_dim = 7 + len(self._max_force_torque) 55 | observation_orn_high = [1] * 4 56 | observation_high = np.array(self._max_pos_range + observation_orn_high + self._max_force_torque) 57 | observation_low = -observation_high 58 | else: # 3 DOF 59 | self.observation_dim = 3 + len(self._max_force_torque) 60 | observation_high = np.array(self._max_pos_range + self._max_force_torque) 61 | observation_low = - observation_high 62 | self.observation_space = spaces.Box(observation_low, observation_high) 63 | 64 | self._action_bound = 1 65 | action_high = np.array([self._action_bound] * action_dim) 66 | self.action_space = spaces.Box(-action_high, action_high) 67 | self.action_dim = action_dim 68 | 69 | self.member_pose = [] 70 | self.force_torque = [] 71 | 72 | self.dist_threshold = dist_threshold 73 | 74 | # csv headers 75 | if WRITE_CSV: 76 | util.write_csv(["step_member_pose", "pos_X", "pos_Y", "pos_Z", "qX", "qY", "qZ", "qW"], 'member_pose.csv', 77 | True) 78 | util.write_csv(["step_ft", "Fx", "Fy", "Fz", "Tx", "Ty", "Tz"], 'ft_reading.csv', True) 79 | if self.action_dim == 3: 80 | util.write_csv(["step_actions", "vel_X", "vel_Y", "vel_Z"], 'data_out.csv', True) 81 | else: 82 | util.write_csv(["step_actions", "vel_X", "vel_Y", "vel_Z", "rot_vel_X", "rot_vel_Y", "rot_vel_Z"], 83 | 'data_out.csv', True) 84 | 85 | @abstractmethod 86 | def reset(self): 87 | pass 88 | 89 | def pos_dist_to_target(self): 90 | self.member_pose = self.get_member_pose() 91 | 92 | # log 93 | if WRITE_CSV: 94 | util.write_csv([self._env_step_counter] + self.member_pose[0] + self.member_pose[1], 'member_pose.csv', 95 | False) 96 | 97 | member_pos = list(self.member_pose[0]) 98 | target_pose = self.get_target_pose() 99 | target_pos = list(target_pose[0]) 100 | 101 | dist_pos = np.linalg.norm(np.subtract(member_pos, target_pos)) # linear dist in m 102 | # util.prGreen("pos dist: {}".format(dist_pos)) 103 | 104 | return dist_pos 105 | 106 | def orn_dist_to_target(self): 107 | member_orn = list(self.member_pose[1]) 108 | target_pose = self.get_target_pose() 109 | target_orn = list(target_pose[1]) 110 | 111 | dist_orn = math.fabs(2 * math.acos(math.fabs(np.dot(member_orn, target_orn))) - math.pi) # angle diff in rad 112 | # util.prGreen("orn dist: {}".format(dist_orn)) 113 | 114 | return dist_orn 115 | 116 | def dist_to_target(self): 117 | dist_pos = self.pos_dist_to_target() 118 | dist = dist_pos 119 | 120 | if self.action_dim > 3: 121 | dist_orn = self.orn_dist_to_target() 122 | dist = dist_pos + 0.05 * dist_orn 123 | 124 | # util.prGreen("dist: {}".format(dist)) 125 | return dist 126 | 127 | def get_extended_observation(self): 128 | self._observation = [] 129 | 130 | if not self._ft_obs_only: 131 | if self.action_dim > 3: 132 | pos, orn = self.member_pose[0], self.member_pose[1] 133 | self._observation.extend(pos) 134 | self._observation.extend(orn) 135 | else: 136 | pos = self.member_pose[0] 137 | self._observation.extend(pos) 138 | 139 | self.force_torque = self.get_force_torque() 140 | 141 | if WRITE_CSV: 142 | util.write_csv([self._env_step_counter] + self.force_torque, 'ft_reading.csv', False) 143 | 144 | if self._limit_force_torque: 145 | self.check_ft_limit(self.force_torque) 146 | # if self._force_torque_violations != [0]*len(self.force_torque): 147 | # util.prRed(self._force_torque_violations) 148 | 149 | self._observation.extend(self.force_torque) 150 | 151 | return self._observation 152 | 153 | def step(self, action): 154 | if len(action) > 3: 155 | delta_lin = np.array(action[0:3]) * self._max_vel * self._time_step 156 | delta_rot = np.array(action[3:6]) * self._max_rad * self._time_step 157 | delta = np.append(delta_lin, delta_rot) 158 | else: 159 | delta = np.array(action) * self._max_vel * self._time_step 160 | 161 | if self._limit_force_torque: 162 | self.constrain_velocity_for_ft(delta) 163 | 164 | if WRITE_CSV: 165 | util.write_csv([self._env_step_counter] + list(delta), 'data_out.csv', False) 166 | 167 | return self.step2(delta) 168 | 169 | # To be overridden 170 | def step2(self, delta): 171 | self._env_step_counter += 1 172 | reward, done, num_success = self.reward() 173 | self._observation = self.get_extended_observation() 174 | 175 | return np.array(self._observation), reward, done, {"num_success": num_success} 176 | 177 | def reward(self): 178 | 179 | done = False 180 | 181 | dist = self.dist_to_target() 182 | reward_dist = - dist 183 | 184 | reward_ft = 0 185 | # reward_ft = self.reward_force_torque() 186 | reward = reward_dist + 0.05 * reward_ft 187 | # util.prRed(reward) 188 | 189 | if dist < self.dist_threshold: 190 | done = True 191 | reward += 1000 192 | util.prGreen("SUCCESS with " + str(self._env_step_counter) + " steps") 193 | self._num_success = 1 194 | 195 | if self._step_limit and self._env_step_counter > self._max_step: 196 | done = True 197 | util.prRed("FAILED") 198 | self._num_success = 0 199 | 200 | return reward, done, self._num_success 201 | 202 | """ Rewards and constraints based on FT limits """ 203 | 204 | def reward_force_torque(self): 205 | 206 | ft_contact_limit = np.multiply(self._max_force_torque, 0.5) # define contact limit ft as % of max 207 | # util.prGreen('force torque: {}'.format(self.force_torque)) 208 | indices = Task.check_list_bounds(self.force_torque, ft_contact_limit) 209 | # util.prGreen(indices) 210 | 211 | max_ft = 0 212 | for i in range(len(indices)): 213 | ft_excess_ratio = (indices[i] * self.force_torque[i] - ft_contact_limit[i]) / self._max_force_torque[i] 214 | if (indices[i] != 0) & (max_ft < ft_excess_ratio): 215 | max_ft = ft_excess_ratio 216 | reward_ft = - max_ft 217 | 218 | # experiment with discouraging FT overload with negative reward 219 | if self._limit_force_torque & (self._force_torque_violations != [0.0] * len(self._force_torque_violations)): 220 | reward_ft -= 5 221 | 222 | # util.prRed('reward ft: {}'.format(reward_ft)) 223 | return reward_ft 224 | 225 | @staticmethod 226 | def check_list_bounds(l, l_bounds): 227 | assert len(l) == len(l_bounds) 228 | index_list = [0] * len(l) 229 | for i in range(len(l)): 230 | if math.fabs(l[i]) >= l_bounds[i]: 231 | # util.prGreen("index {}: {}".format(i,l[i])) 232 | index_list[i] = np.sign(l[i]) 233 | 234 | # in the form of [0, 0, 1, 0, -1, 0] 235 | return index_list 236 | 237 | def check_ft_limit(self, force_torque): 238 | # check force torque against limits 239 | self._force_torque_violations = Task.check_list_bounds(force_torque, 240 | np.multiply(self._ft_range_ratio, 241 | self._max_force_torque)) 242 | 243 | def constrain_velocity_for_ft(self, velocity): 244 | # stop (only) linear motion along the direction where force torque limit is violated 245 | 246 | force_list = self._force_torque_violations[0:3] 247 | torque_list = self._force_torque_violations[3:6] 248 | lin_vel = velocity[0:3] 249 | axes = ['x', 'y', 'z'] 250 | 251 | for i in range(len(force_list)): 252 | if (force_list[i] != 0) & (np.sign(force_list[i]) != np.sign(lin_vel[i])): 253 | lin_vel[i] = -0.1 * lin_vel[i] 254 | 255 | for i in range(len(torque_list)): 256 | if torque_list[i] != 0: 257 | for j in range(len(torque_list)): 258 | if j != i: 259 | lin_vel[j] = 0.0 260 | 261 | velocity[0:3] = lin_vel 262 | 263 | return velocity 264 | -------------------------------------------------------------------------------- /envs/task_real.py: -------------------------------------------------------------------------------- 1 | # Task setup for working with the real robot and sensor 2 | 3 | import numpy as np 4 | 5 | from envs.task import Task 6 | 7 | 8 | class TaskReal(Task): 9 | 10 | def __init__(self, 11 | env_robot=None, 12 | time_step=None, 13 | max_steps=None, 14 | step_limit=None, 15 | action_dim=None, 16 | max_vel=None, 17 | max_rad=None, 18 | ft_obs_only=None, 19 | limit_ft=None, 20 | max_ft=None, 21 | max_position_range=None, 22 | dist_threshold=None): 23 | 24 | super().__init__(max_steps=max_steps, 25 | action_dim=action_dim, 26 | step_limit=step_limit, 27 | max_vel=max_vel, 28 | max_rad=max_rad, 29 | ft_obs_only=ft_obs_only, 30 | limit_ft=limit_ft, 31 | time_step=time_step, 32 | max_ft=max_ft, 33 | max_position_range=max_position_range, 34 | dist_threshold=dist_threshold) 35 | 36 | self.env = env_robot() 37 | 38 | def reset(self): 39 | self.max_dist = self.dist_to_target() 40 | self._env_step_counter = 0 41 | self._observation = self.get_extended_observation() 42 | 43 | return np.array(self._observation) 44 | 45 | def get_member_pose(self): 46 | return self.env.get_member_pose() 47 | 48 | def get_target_pose(self): 49 | return self.env.get_target_pose() 50 | 51 | def get_force_torque(self): 52 | return self.env.get_force_torque() 53 | 54 | def step2(self, delta): 55 | reward, done, num_success = self.reward() 56 | 57 | if done: 58 | if self.action_dim > 3: 59 | last_delta = [0.0] * 6 60 | self.env.apply_action_pose(last_delta, 1) 61 | else: 62 | last_delta = [0.0] * 3 63 | self.env.apply_action_position(last_delta, 1) 64 | else: 65 | if self.action_dim > 3: 66 | self.env.apply_action_pose(delta, 0) 67 | else: 68 | self.env.apply_action_position(delta, 0) 69 | 70 | self._env_step_counter += 1 71 | 72 | self._observation = self.get_extended_observation() 73 | 74 | return np.array(self._observation), reward, done, {"num_success": num_success} 75 | -------------------------------------------------------------------------------- /envs/task_sim.py: -------------------------------------------------------------------------------- 1 | import pybullet as p 2 | import time 3 | 4 | import numpy as np 5 | from gym.utils import seeding 6 | 7 | from envs.task import Task 8 | 9 | 10 | class TaskSim(Task): 11 | 12 | def __init__(self, 13 | env_robot=None, 14 | self_collision_enabled=None, 15 | renders=None, 16 | ft_noise=None, 17 | pose_noise=None, 18 | action_noise=None, 19 | physical_noise=None, 20 | time_step=None, 21 | max_steps=None, 22 | step_limit=None, 23 | action_dim=None, 24 | max_vel=None, 25 | max_rad=None, 26 | ft_obs_only=None, 27 | limit_ft=None, 28 | max_ft=None, 29 | max_position_range=None, 30 | dist_threshold=None): 31 | 32 | super().__init__(max_steps=max_steps, 33 | action_dim=action_dim, 34 | step_limit=step_limit, 35 | max_vel=max_vel, 36 | max_rad=max_rad, 37 | ft_obs_only=ft_obs_only, 38 | limit_ft=limit_ft, 39 | time_step=time_step, 40 | max_ft=max_ft, 41 | max_position_range=max_position_range, 42 | dist_threshold=dist_threshold) 43 | 44 | self._env_robot = env_robot 45 | self._self_collision_enabled = self_collision_enabled 46 | self._renders = renders 47 | 48 | """ parameters to control the level of domain randomization """ 49 | self._ft_noise = ft_noise 50 | self._ft_noise_level = [0.5, 0.5, 0.5, 0.05, 0.05, 0.05] # N N N Nm Nm Nm 51 | self._ft_bias_level = [2.0, 2.0, 2.0, 0.2, 0.2, 0.2] # N N N Nm Nm Nm 52 | self._ft_bias = 0.0 53 | self._pose_noise = pose_noise 54 | self._pos_noise_level = 0.001 # m 55 | self._orn_noise_level = 0.001 # rad 56 | self._action_noise = action_noise 57 | self._action_noise_lin = 0.001 # multiplier for linear translation 58 | self._action_noise_rot = 0.001 # multiplier for rotation 59 | self._physical_noise = physical_noise 60 | self._friction_noise_level = 0.1 61 | 62 | if self._renders: 63 | cid = p.connect(p.SHARED_MEMORY) 64 | if (cid < 0): 65 | cid = p.connect(p.GUI) 66 | p.resetDebugVisualizerCamera(0.6, 180, -41, [0, 0, 0]) 67 | else: 68 | p.connect(p.DIRECT) 69 | 70 | # p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0) 71 | 72 | self.seed() 73 | 74 | def reset(self): 75 | p.resetSimulation() 76 | p.setPhysicsEngineParameter(numSolverIterations=150, enableFileCaching=0) 77 | p.setTimeStep(self._time_step) 78 | p.setGravity(0, 0, 0) 79 | 80 | self.env = self._env_robot() 81 | self.max_dist = self.dist_to_target() 82 | 83 | self._env_step_counter = 0 84 | self.env.enable_force_torque_sensor() 85 | p.stepSimulation() 86 | 87 | self.correlated_noise() 88 | 89 | self._observation = self.get_extended_observation() 90 | self.add_observation_noise() 91 | 92 | return np.array(self._observation) 93 | 94 | def __del__(self): 95 | p.disconnect() 96 | 97 | def seed(self, seed=None): 98 | self.np_random, seed = seeding.np_random(seed) 99 | return [seed] 100 | 101 | def get_member_pose(self): 102 | return self.env.get_member_pose() 103 | 104 | def get_target_pose(self): 105 | return self.env.get_target_pose() 106 | 107 | def get_force_torque(self): 108 | return self.env.get_force_torque() 109 | 110 | def step2(self, delta): 111 | reward, done, num_success = self.reward() 112 | 113 | if not done: 114 | if self.action_dim > 3: 115 | self.env.apply_action_pose(delta) 116 | else: 117 | self.env.apply_action_position(delta) 118 | else: 119 | if self.action_dim > 3: 120 | self.env.apply_action_pose([0.0] * 6) 121 | else: 122 | self.env.apply_action_position([0.0] * 3) 123 | 124 | p.stepSimulation() 125 | self._env_step_counter += 1 126 | 127 | if self._renders: 128 | time.sleep(self._time_step) 129 | 130 | self._observation = self.get_extended_observation() 131 | self.add_observation_noise() 132 | 133 | return np.array(self._observation), reward, done, {"num_success": num_success} 134 | 135 | def correlated_noise(self): 136 | # force torque sensor bias, sampled at the start of each episode 137 | if self._ft_noise: 138 | self._ft_bias = self.add_gaussian_noise(0.0, self._ft_bias_level, [0] * len(self._ft_bias_level)) 139 | 140 | # add some physical noise here 141 | if self._physical_noise: 142 | self.add_all_friction_noise(self._friction_noise_level) 143 | 144 | def uncorrelated_pose_noise(self, pos, orn): 145 | if self._pose_noise: 146 | pos = self.add_gaussian_noise(0.0, self._pos_noise_level, pos) 147 | orn_euler = p.getEulerFromQuaternion(orn) 148 | orn_euler = self.add_gaussian_noise(0.0, self._orn_noise_level, orn_euler) 149 | orn = p.getQuaternionFromEuler(orn_euler) 150 | 151 | return pos, orn 152 | 153 | def uncorrelated_position_noise(self, pos): 154 | if self._pose_noise: 155 | pos = self.add_gaussian_noise(0.0, self._pos_noise_level, pos) 156 | 157 | return pos 158 | 159 | def add_ft_noise(self, force_torque): 160 | if self._ft_noise: 161 | force_torque = self.add_gaussian_noise(0.0, self._ft_noise_level, force_torque) 162 | force_torque = np.add(force_torque, self._ft_bias).tolist() 163 | 164 | return force_torque 165 | 166 | def add_observation_noise(self): 167 | if not self._ft_obs_only: 168 | if self.action_dim > 3: 169 | self._observation[0:3], self._observation[3:7] = self.uncorrelated_pose_noise(self._observation[0:3], 170 | self._observation[3:7]) 171 | self._observation[7:13] = self.add_ft_noise(self._observation[7:13]) 172 | else: 173 | self._observation[0:3] = self.uncorrelated_position_noise(self._observation[0:3]) 174 | self._observation[3:9] = self.add_ft_noise(self._observation[3:9]) 175 | else: 176 | self._observation[0:6] = self.add_ft_noise(self._observation[0:6]) 177 | 178 | def add_all_friction_noise(self, noise_level): 179 | self.add_body_friction_noise(self.env.target_uid, self.env.link_target, noise_level) 180 | self.add_body_friction_noise(self.env.uid, self.env.link_member, noise_level) 181 | 182 | @staticmethod 183 | def add_body_friction_noise(uid, link, noise_level): 184 | dynamics = p.getDynamicsInfo(uid, link) 185 | noise_range = np.fabs(dynamics[1]) * noise_level 186 | friction_noise = np.random.normal(0, noise_range) 187 | p.changeDynamics(uid, link, lateralFriction=dynamics[1] + friction_noise) 188 | 189 | @staticmethod 190 | def add_gaussian_noise(mean, std, vec): 191 | noise = np.random.normal(mean, std, np.shape(vec)) 192 | return np.add(vec, noise).tolist() 193 | -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/insertion_box.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/panda_hand.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/panda_peg.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 316 | 317 | 341 | 342 | 369 | 370 | 371 | 372 | -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/finger.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/hand.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/insertion_box.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/insertion_box.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/link0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link0.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link1.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link2.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link3.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link4.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link5.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link6.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/link7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link7.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/collision/peg.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/peg.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/visual/insertion_box.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/visual/insertion_box.stl -------------------------------------------------------------------------------- /envs/urdf/panda_peg_in_hole/stl/visual/peg.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/visual/peg.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_1.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_2.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_3.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_visual.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_visual.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_1.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_2.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_3.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_visual.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_visual.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_1.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_2.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_3.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_visual.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_visual.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_1.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_2.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_3.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_visual.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_visual.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_1.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_2.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_3.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_4.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_5.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_6.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_7.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_8.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_8.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_gripper_visual.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_visual.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_1.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_2.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_3.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_visual.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_visual.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_1.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_2.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_3.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/stl/tool_sensor_visual.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_sensor_visual.stl -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/task_lap_90deg.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /envs/urdf/robotless_lap_joint/tool.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /envs_launcher.py: -------------------------------------------------------------------------------- 1 | from envs.robot_sim_robotless import RobotSimRobotless 2 | from envs.robot_sim_panda import RobotSimPanda 3 | from envs.robot_real_example import RobotRealExample 4 | 5 | t = 'sim' # sim or real? 6 | 7 | if t == 'sim': 8 | from envs.task_sim import TaskSim 9 | def env_creator(env_config): 10 | environment = TaskSim(env_robot=RobotSimRobotless, # choose the sim robot class 11 | self_collision_enabled=True, # collision setting for pybullet 12 | renders=True, # normally for running sim and rolling out in sim, this is set to True; for training, False. 13 | ft_noise=False, # domain randomization on force/torque observation 14 | pose_noise=False, # domain randomization on pose observation 15 | action_noise=False, # domain randomization on actions 16 | physical_noise=False, # domain randomization on physical parameters 17 | time_step=1/250, # sets the control frequency of the robot 18 | max_steps=4000, # max number of steps in each episode 19 | step_limit=True, # limit the length of an episode by max_steps? 20 | action_dim=6, # dimension of action space 21 | max_vel=0.01, # max linear velocity (m/s) along each axis 22 | max_rad=0.01, # max rotational velocity (rad/s) around each axis 23 | ft_obs_only=False, # only use force/torque as observation? 24 | limit_ft=False, # limit force/torque based on max_ft? 25 | max_ft=[1000, 1000, 2500, 100, 100, 100], # max force (N) and torque (Nm) 26 | max_position_range=[2]*3, # max observation space for positions (m) 27 | dist_threshold=0.005) # an episode is considered successful when distance is within the threshold. 28 | 29 | return environment 30 | 31 | 32 | if t == 'real': 33 | from envs.task_real import TaskReal 34 | def env_creator(env_config): 35 | environment = TaskReal(env_robot=RobotRealExample, # choose the real robot class 36 | time_step=1/250, 37 | max_steps=4000, 38 | step_limit=True, 39 | action_dim=6, 40 | max_vel=0.01, 41 | max_rad=0.01, 42 | ft_obs_only=False, 43 | limit_ft=False, 44 | max_ft=[667.233, 667.233, 2001.69, 67.7908, 67.7908, 67.7908], # ATI-Delta FT sensor limits 45 | max_position_range=[2] * 3, 46 | dist_threshold=0.005) 47 | 48 | return environment 49 | 50 | 51 | -------------------------------------------------------------------------------- /hyper_parameters/assembly_apex_ddpg.yaml: -------------------------------------------------------------------------------- 1 | assembly-apex-ddpg: 2 | 3 | env: ROBOTIC_ASSEMBLY 4 | run: APEX_DDPG 5 | 6 | checkpoint_freq: 1 # the frequency at which to store checkpoints 7 | checkpoint_at_end: True # store the last checkpoint? 8 | 9 | local_dir: "~/workspace/ray_results" # directory to store checkpoints 10 | 11 | stop: 12 | successful_rate: 0.9 13 | # training_iteration: 500 # stop training at a predefined number of iterations 14 | config: 15 | use_huber: True 16 | clip_rewards: False 17 | num_workers: 5 # > 1 and < 1/2 of total number of logical cpu cores 18 | n_step: 3 19 | 20 | target_network_update_freq: 50000 21 | buffer_size: 2000000 # this is the total buffer size that will be divided by the number of replay buffers 22 | prioritized_replay_alpha: 0.5 23 | sample_batch_size: 50 24 | train_batch_size: 512 25 | min_iter_time_s: 10 26 | 27 | actor_hiddens: [256, 256] 28 | critic_hiddens: [256, 256] 29 | 30 | parameter_noise: False 31 | batch_mode: "truncate_episodes" # "complete_episodes" or "truncate_episodes" 32 | 33 | # For APEX-DDPG, tau == 1 has the best performance. 34 | # The algorithm uses target_network_update_freq to update. 35 | tau: 1.0 36 | 37 | observation_filter: "MeanStdFilter" 38 | 39 | optimizer: 40 | num_replay_buffer_shards: 6 # the number of replay buffers used 41 | human_demonstration: True # use human demonstration in training? 42 | human_data_dir: "human_demo_data/lap" # only activated when the above param is True 43 | -------------------------------------------------------------------------------- /rollout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import collections 9 | import json 10 | import os 11 | import pickle 12 | 13 | import gym 14 | import ray 15 | from ray.rllib.agents.registry import get_agent_class 16 | from ray.rllib.env import MultiAgentEnv 17 | from ray.rllib.env.base_env import _DUMMY_AGENT_ID 18 | from ray.rllib.evaluation.episode import _flatten_action 19 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID 20 | from ray.tune.util import merge_dicts 21 | 22 | from ray.tune.registry import register_env 23 | import envs_launcher as el 24 | 25 | EXAMPLE_USAGE = """ 26 | Example Usage via RLlib CLI: 27 | rllib rollout /tmp/ray/checkpoint_dir/checkpoint-0 --run DQN 28 | --env CartPole-v0 --steps 1000000 --out rollouts.pkl 29 | 30 | Example Usage via executable: 31 | ./rollout.py /tmp/ray/checkpoint_dir/checkpoint-0 --run DQN 32 | --env CartPole-v0 --steps 1000000 --out rollouts.pkl 33 | """ 34 | 35 | # Note: if you use any custom models or envs, register them here first, e.g.: 36 | # 37 | # ModelCatalog.register_custom_model("pa_model", ParametricActionsModel) 38 | # register_env("pa_cartpole", lambda _: ParametricActionCartpole(10)) 39 | 40 | 41 | def create_parser(parser_creator=None): 42 | parser_creator = parser_creator or argparse.ArgumentParser 43 | parser = parser_creator( 44 | formatter_class=argparse.RawDescriptionHelpFormatter, 45 | description="Roll out a reinforcement learning agent " 46 | "given a checkpoint.", 47 | epilog=EXAMPLE_USAGE) 48 | 49 | parser.add_argument( 50 | "checkpoint", type=str, help="Checkpoint from which to roll out.") 51 | required_named = parser.add_argument_group("required named arguments") 52 | # required_named.add_argument( 53 | # "--run", 54 | # type=str, 55 | # required=True, 56 | # help="The algorithm or model to train. This may refer to the name " 57 | # "of a built-on algorithm (e.g. RLLib's DQN or PPO), or a " 58 | # "user-defined trainable function or class registered in the " 59 | # "tune registry.") 60 | required_named.add_argument( 61 | "--env", type=str, help="The gym environment to use.") 62 | parser.add_argument( 63 | "--no-render", 64 | default=False, 65 | action="store_const", 66 | const=True, 67 | help="Surpress rendering of the environment.") 68 | parser.add_argument( 69 | "--steps", default=10000, help="Number of steps to roll out.") 70 | parser.add_argument( 71 | "--episodes", 72 | default=1, 73 | type=int, 74 | help="Number of episodes to roll out.") 75 | parser.add_argument("--out", default=None, help="Output filename.") 76 | parser.add_argument( 77 | "--config", 78 | default="{}", 79 | type=json.loads, 80 | help="Algorithm-specific configuration (e.g. env, hyperparams). " 81 | "Surpresses loading of configuration from checkpoint.") 82 | return parser 83 | 84 | 85 | def run(args, parser): 86 | config = {} 87 | # Load configuration from file 88 | config_dir = os.path.dirname(args.checkpoint) 89 | config_path = os.path.join(config_dir, "params.pkl") 90 | if not os.path.exists(config_path): 91 | config_path = os.path.join(config_dir, "../params.pkl") 92 | if not os.path.exists(config_path): 93 | if not args.config: 94 | raise ValueError( 95 | "Could not find params.pkl in either the checkpoint dir or " 96 | "its parent directory.") 97 | else: 98 | with open(config_path, "rb") as f: 99 | config = pickle.load(f) 100 | if "num_workers" in config: 101 | config["num_workers"] = min(2, config["num_workers"]) 102 | config = merge_dicts(config, args.config) 103 | if not args.env: 104 | if not config.get("env"): 105 | parser.error("the following arguments are required: --env") 106 | args.env = config.get("env") 107 | 108 | # remove unnecessary parameters 109 | if "num_workers" in config: 110 | del config["num_workers"] 111 | if "human_data_dir" in config["optimizer"]: 112 | del config["optimizer"]["human_data_dir"] 113 | if "human_demonstration" in config["optimizer"]: 114 | del config["optimizer"]["human_demonstration"] 115 | if "multiple_human_data" in config["optimizer"]: 116 | del config["optimizer"]["multiple_human_data"] 117 | if "num_replay_buffer_shards" in config["optimizer"]: 118 | del config["optimizer"]["num_replay_buffer_shards"] 119 | if "demonstration_zone_percentage" in config["optimizer"]: 120 | del config["optimizer"]["demonstration_zone_percentage"] 121 | if "dynamic_experience_replay" in config["optimizer"]: 122 | del config["optimizer"]["dynamic_experience_replay"] 123 | if "robot_demo_path" in config["optimizer"]: 124 | del config["optimizer"]["robot_demo_path"] 125 | 126 | ray.init() 127 | 128 | # cls = get_agent_class(args.run) 129 | # agent = cls(env=args.env, config=config) 130 | 131 | cls = get_agent_class("DDPG") 132 | agent = cls(env="ROBOTIC_ASSEMBLY", config=config) 133 | 134 | agent.restore(args.checkpoint) 135 | num_steps = int(args.steps) 136 | num_episodes = int(args.episodes) 137 | rollout(agent, args.env, num_steps, num_episodes, args.out) 138 | 139 | 140 | class DefaultMapping(collections.defaultdict): 141 | """default_factory now takes as an argument the missing key.""" 142 | 143 | def __missing__(self, key): 144 | self[key] = value = self.default_factory(key) 145 | return value 146 | 147 | 148 | def default_policy_agent_mapping(unused_agent_id): 149 | return DEFAULT_POLICY_ID 150 | 151 | 152 | def rollout(agent, env_name, num_steps, num_episodes, out=None): 153 | policy_agent_mapping = default_policy_agent_mapping 154 | 155 | if hasattr(agent, "workers"): 156 | env = agent.workers.local_worker().env 157 | multiagent = isinstance(env, MultiAgentEnv) 158 | if agent.workers.local_worker().multiagent: 159 | policy_agent_mapping = agent.config["multiagent"][ 160 | "policy_mapping_fn"] 161 | 162 | policy_map = agent.workers.local_worker().policy_map 163 | state_init = {p: m.get_initial_state() for p, m in policy_map.items()} 164 | use_lstm = {p: len(s) > 0 for p, s in state_init.items()} 165 | action_init = { 166 | p: _flatten_action(m.action_space.sample()) 167 | for p, m in policy_map.items() 168 | } 169 | else: 170 | env = gym.make(env_name) 171 | multiagent = False 172 | use_lstm = {DEFAULT_POLICY_ID: False} 173 | 174 | if out is not None: 175 | rollouts = [] 176 | steps = 0 177 | episodes = 0 178 | while steps < (num_steps or steps + 1) and (episodes < num_episodes): 179 | mapping_cache = {} # in case policy_agent_mapping is stochastic 180 | if out is not None: 181 | rollout = [] 182 | obs = env.reset() 183 | agent_states = DefaultMapping( 184 | lambda agent_id: state_init[mapping_cache[agent_id]]) 185 | prev_actions = DefaultMapping( 186 | lambda agent_id: action_init[mapping_cache[agent_id]]) 187 | prev_rewards = collections.defaultdict(lambda: 0.) 188 | done = False 189 | reward_total = 0.0 190 | while not done and steps < (num_steps or steps + 1): 191 | multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs} 192 | action_dict = {} 193 | for agent_id, a_obs in multi_obs.items(): 194 | if a_obs is not None: 195 | policy_id = mapping_cache.setdefault( 196 | agent_id, policy_agent_mapping(agent_id)) 197 | p_use_lstm = use_lstm[policy_id] 198 | if p_use_lstm: 199 | a_action, p_state, _ = agent.compute_action( 200 | a_obs, 201 | state=agent_states[agent_id], 202 | prev_action=prev_actions[agent_id], 203 | prev_reward=prev_rewards[agent_id], 204 | policy_id=policy_id) 205 | agent_states[agent_id] = p_state 206 | else: 207 | a_action = agent.compute_action( 208 | a_obs, 209 | prev_action=prev_actions[agent_id], 210 | prev_reward=prev_rewards[agent_id], 211 | policy_id=policy_id) 212 | a_action = _flatten_action(a_action) # tuple actions 213 | action_dict[agent_id] = a_action 214 | prev_actions[agent_id] = a_action 215 | action = action_dict 216 | 217 | action = action if multiagent else action[_DUMMY_AGENT_ID] 218 | next_obs, reward, done, _ = env.step(action) 219 | if multiagent: 220 | for agent_id, r in reward.items(): 221 | prev_rewards[agent_id] = r 222 | else: 223 | prev_rewards[_DUMMY_AGENT_ID] = reward 224 | 225 | if multiagent: 226 | done = done["__all__"] 227 | reward_total += sum(reward.values()) 228 | else: 229 | reward_total += reward 230 | 231 | if out is not None: 232 | rollout.append([obs, action, next_obs, reward, done]) 233 | steps += 1 234 | obs = next_obs 235 | if out is not None: 236 | rollouts.append(rollout) 237 | print("Episode reward", reward_total) 238 | episodes += 1 239 | 240 | if out is not None: 241 | pickle.dump(rollouts, open(out, "wb")) 242 | 243 | 244 | if __name__ == "__main__": 245 | register_env("ROBOTIC_ASSEMBLY", el.env_creator) 246 | parser = create_parser() 247 | args = parser.parse_args() 248 | run(args, parser) 249 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test simulated envs or Collect human demonstration data 3 | """ 4 | 5 | import argparse 6 | import collections 7 | import pickle 8 | import numpy as np 9 | 10 | import envs_launcher 11 | import devices 12 | import utilities 13 | 14 | 15 | def getargs(): 16 | parser = argparse.ArgumentParser( 17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 18 | parser.add_argument( 19 | '--input-type', 20 | type=str, 21 | default='pbg', 22 | help='Input device type; must be equal to any of: ' + 23 | ', '.join(devices.REGISTRY.keys())) 24 | parser.add_argument( 25 | '--input-scaling', 26 | type=tuple, 27 | default=(10, 25), 28 | help='Scaling applied to device inputs; (pos, orn).') 29 | parser.add_argument( 30 | '--action-space', 31 | type=int, 32 | default=6, 33 | help='Degrees of freedom; must be equal to either 3 or 6.') 34 | parser.add_argument( 35 | '--save-demo-data', 36 | type=bool, 37 | default=False, 38 | help='Save demonstration data.') 39 | parser.add_argument( 40 | '--demo-data-path', 41 | type=str, 42 | default='human_demo_data/default', 43 | help='Location to save demonstration data') 44 | args = parser.parse_args() 45 | assert args.input_type in devices.REGISTRY, \ 46 | 'arg `input-type` must be equal to any of: ' + ', '.join(devices.REGISTRY.keys()) 47 | assert args.action_space == 3 or args.action_space == 6, \ 48 | 'arg `action-space` must be equal to either 3 or 6.' 49 | return args 50 | 51 | 52 | def main(): 53 | args = getargs() 54 | 55 | environment = envs_launcher.env_creator(args) 56 | obs = environment.reset() 57 | memory = collections.deque() 58 | 59 | device_cls = devices.REGISTRY[args.input_type] 60 | device = device_cls(*args.input_scaling) 61 | device.start() 62 | 63 | done = False 64 | while not done: 65 | device.update() 66 | action = device.pose[:args.action_space] 67 | 68 | new_obs, reward, done, info = environment.step(action) 69 | 70 | if args.save_demo_data and np.count_nonzero(action) > 0: # we don't want to save all-zero actions 71 | memory.append((obs, action, reward, new_obs, done)) 72 | obs = new_obs 73 | else: 74 | device.disconnect() 75 | 76 | if args.save_demo_data: 77 | # save all the transitions 78 | file_name = args.demo_data_path 79 | out_file = open(file_name, 'wb') 80 | pickle.dump(memory, out_file) 81 | out_file.close() 82 | utilities.prGreen('Transition saved') 83 | utilities.prGreen('Steps: {}'.format(len(memory))) 84 | 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import yaml 9 | 10 | import ray 11 | from ray.tests.cluster_utils import Cluster 12 | from ray.tune.config_parser import make_parser 13 | from ray.tune.result import DEFAULT_RESULTS_DIR 14 | from ray.tune.resources import resources_to_json 15 | from ray.tune.tune import _make_scheduler, run_experiments 16 | 17 | EXAMPLE_USAGE = """ 18 | Training example via RLlib CLI: 19 | rllib train --run DQN --env CartPole-v0 20 | 21 | Grid search example via RLlib CLI: 22 | rllib train -f tuned_examples/cartpole-grid-search-example.yaml 23 | 24 | Grid search example via executable: 25 | ./train.py -f tuned_examples/cartpole-grid-search-example.yaml 26 | 27 | Note that -f overrides all other trial-specific command-line options. 28 | """ 29 | 30 | 31 | # additional libraries for dynamic experience replay 32 | from ray.tune.registry import register_env, register_trainable 33 | from ray.rllib.agents.registry import get_agent_class 34 | import os 35 | import random 36 | import envs_launcher 37 | import utilities as util 38 | import shutil 39 | from collections import deque 40 | import pickle 41 | 42 | #======================================== 43 | # Callback functions 44 | # for (1) custom metric -> success rate, 45 | # (2) for save successful robot demos 46 | #======================================== 47 | def on_episode_start(info): 48 | episode = info["episode"] 49 | episode.user_data["success"] = 0 50 | 51 | def on_episode_step(info): 52 | pass 53 | 54 | def on_episode_end(info): 55 | episode = info["episode"] 56 | if len(episode.last_info_for().values()) > 0: 57 | episode.user_data["success"] = list(episode.last_info_for().values())[0] 58 | episode.custom_metrics["successful_rate"] = episode.user_data["success"] 59 | 60 | def on_sample_end(info): 61 | pass 62 | 63 | def on_postprocess_traj(info): 64 | pass 65 | # if list(info["episode"].last_info_for().values())[0] > 0: 66 | # save_episode(info["post_batch"]) 67 | 68 | def on_train_result(info): 69 | if "successful_rate_mean" in info["result"]["custom_metrics"]: 70 | info["result"]["successful_rate"] = info["result"]["custom_metrics"]["successful_rate_mean"] 71 | 72 | def save_episode(samples): 73 | memory = deque() 74 | for row in samples.rows(): 75 | obs = row["obs"] 76 | action = row["actions"] 77 | reward = row["rewards"] 78 | new_obs = row["new_obs"] 79 | done = row["dones"] 80 | memory.append((obs, action, reward, new_obs, done)) 81 | # save transitions 82 | file_name = dir_path + str(random.random()) 83 | out_file = open(file_name, 'wb') 84 | pickle.dump(memory, out_file) 85 | out_file.close() 86 | util.prGreen("A successful transition is saved, length {}".format(len(memory))) 87 | 88 | def get_task_path(yaml_file): 89 | with open(yaml_file) as f: 90 | experiments = yaml.safe_load(f) 91 | experiment_name = next(iter(experiments)) 92 | dir_path = experiments[experiment_name]["local_dir"] 93 | dir_path = os.path.expanduser(dir_path) 94 | dir_path = os.path.join(dir_path, experiment_name) + "/robot_demos/" 95 | if not os.path.exists(dir_path): 96 | os.makedirs(dir_path) 97 | else: 98 | shutil.rmtree(dir_path) 99 | os.makedirs(dir_path) 100 | return dir_path 101 | #====================================== 102 | # End of Callback functions 103 | #====================================== 104 | 105 | 106 | def create_parser(parser_creator=None): 107 | parser = make_parser( 108 | parser_creator=parser_creator, 109 | formatter_class=argparse.RawDescriptionHelpFormatter, 110 | description="Train a reinforcement learning agent.", 111 | epilog=EXAMPLE_USAGE) 112 | 113 | # See also the base parser definition in ray/tune/config_parser.py 114 | parser.add_argument( 115 | "--ray-address", 116 | default=None, 117 | type=str, 118 | help="Connect to an existing Ray cluster at this address instead " 119 | "of starting a new one.") 120 | parser.add_argument( 121 | "--ray-num-cpus", 122 | default=None, 123 | type=int, 124 | help="--num-cpus to use if starting a new cluster.") 125 | parser.add_argument( 126 | "--ray-num-gpus", 127 | default=None, 128 | type=int, 129 | help="--num-gpus to use if starting a new cluster.") 130 | parser.add_argument( 131 | "--ray-num-nodes", 132 | default=None, 133 | type=int, 134 | help="Emulate multiple cluster nodes for debugging.") 135 | parser.add_argument( 136 | "--ray-redis-max-memory", 137 | default=None, 138 | type=int, 139 | help="--redis-max-memory to use if starting a new cluster.") 140 | parser.add_argument( 141 | "--ray-memory", 142 | default=None, 143 | type=int, 144 | help="--memory to use if starting a new cluster.") 145 | parser.add_argument( 146 | "--ray-object-store-memory", 147 | default=None, 148 | type=int, 149 | help="--object-store-memory to use if starting a new cluster.") 150 | parser.add_argument( 151 | "--experiment-name", 152 | default="default", 153 | type=str, 154 | help="Name of the subdirectory under `local_dir` to put results in.") 155 | parser.add_argument( 156 | "--local-dir", 157 | default=DEFAULT_RESULTS_DIR, 158 | type=str, 159 | help="Local dir to save training results to. Defaults to '{}'.".format( 160 | DEFAULT_RESULTS_DIR)) 161 | parser.add_argument( 162 | "--upload-dir", 163 | default="", 164 | type=str, 165 | help="Optional URI to sync training results to (e.g. s3://bucket).") 166 | parser.add_argument( 167 | "--resume", 168 | action="store_true", 169 | help="Whether to attempt to resume previous Tune experiments.") 170 | parser.add_argument( 171 | "--eager", 172 | action="store_true", 173 | help="Whether to attempt to enable TF eager execution.") 174 | parser.add_argument( 175 | "--trace", 176 | action="store_true", 177 | help="Whether to attempt to enable tracing for eager mode.") 178 | parser.add_argument( 179 | "--env", default=None, type=str, help="The gym environment to use.") 180 | parser.add_argument( 181 | "--queue-trials", 182 | action="store_true", 183 | help=( 184 | "Whether to queue trials when the cluster does not currently have " 185 | "enough resources to launch one. This should be set to True when " 186 | "running on an autoscaling cluster to enable automatic scale-up.")) 187 | parser.add_argument( 188 | "-f", 189 | "--config-file", 190 | default=None, 191 | type=str, 192 | help="If specified, use config options from this file. Note that this " 193 | "overrides any trial-specific options set via flags above.") 194 | return parser 195 | 196 | def run(args, parser): 197 | if args.config_file: 198 | with open(args.config_file) as f: 199 | experiments = yaml.safe_load(f) 200 | 201 | # add callbacks for self-defined metric 202 | # and save successful transitions from RL agents 203 | experiment_name = next(iter(experiments)) 204 | experiments[experiment_name]["config"]["optimizer"]["robot_demo_path"] = dir_path 205 | experiments[experiment_name]["config"]["callbacks"] = { 206 | "on_episode_start": on_episode_start, 207 | "on_episode_step": on_episode_step, 208 | "on_episode_end": on_episode_end, 209 | "on_sample_end": on_sample_end, 210 | "on_train_result": on_train_result, 211 | "on_postprocess_traj": on_postprocess_traj 212 | } 213 | else: 214 | # Note: keep this in sync with tune/config_parser.py 215 | experiments = { 216 | args.experiment_name: { # i.e. log to ~/ray_results/default 217 | "run": args.run, 218 | "checkpoint_freq": args.checkpoint_freq, 219 | "keep_checkpoints_num": args.keep_checkpoints_num, 220 | "checkpoint_score_attr": args.checkpoint_score_attr, 221 | "local_dir": args.local_dir, 222 | "resources_per_trial": ( 223 | args.resources_per_trial and 224 | resources_to_json(args.resources_per_trial)), 225 | "stop": args.stop, 226 | "config": dict(args.config, env=args.env), 227 | "restore": args.restore, 228 | "num_samples": args.num_samples, 229 | "upload_dir": args.upload_dir, 230 | } 231 | } 232 | 233 | for exp in experiments.values(): 234 | if not exp.get("run"): 235 | parser.error("the following arguments are required: --run") 236 | if not exp.get("env") and not exp.get("config", {}).get("env"): 237 | parser.error("the following arguments are required: --env") 238 | if args.eager: 239 | exp["config"]["eager"] = True 240 | if args.trace: 241 | if not exp["config"].get("eager"): 242 | raise ValueError("Must enable --eager to enable tracing.") 243 | exp["config"]["eager_tracing"] = True 244 | 245 | if args.ray_num_nodes: 246 | cluster = Cluster() 247 | for _ in range(args.ray_num_nodes): 248 | cluster.add_node( 249 | num_cpus=args.ray_num_cpus or 1, 250 | num_gpus=args.ray_num_gpus or 0, 251 | object_store_memory=args.ray_object_store_memory, 252 | memory=args.ray_memory, 253 | redis_max_memory=args.ray_redis_max_memory) 254 | ray.init(address=cluster.address) #, log_to_driver=False) 255 | else: 256 | ray.init( 257 | address=args.ray_address, 258 | object_store_memory=args.ray_object_store_memory, 259 | memory=args.ray_memory, 260 | redis_max_memory=args.ray_redis_max_memory, 261 | num_cpus=args.ray_num_cpus, 262 | num_gpus=args.ray_num_gpus) 263 | # log_to_driver=False) # disable the loggings 264 | # https://github.com/ray-project/ray/issues/5048 265 | 266 | run_experiments( 267 | experiments, 268 | scheduler=_make_scheduler(args), 269 | queue_trials=args.queue_trials, 270 | resume=args.resume) 271 | 272 | if __name__ == "__main__": 273 | parser = create_parser() 274 | args = parser.parse_args() 275 | 276 | random.seed(12345) 277 | 278 | # register customized envs 279 | register_env("ROBOTIC_ASSEMBLY", envs_launcher.env_creator) 280 | 281 | # register customized algorithms 282 | # register_trainable("APEX_DDPG_DEMO", get_agent_class("contrib/APEX_DDPG_DEMO")) 283 | 284 | # get the path for saving robot demos 285 | dir_path = get_task_path(args.config_file) 286 | 287 | run(args, parser) 288 | -------------------------------------------------------------------------------- /utilities.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import numpy 3 | import os 4 | import inspect 5 | import transforms3d 6 | import pybullet as p 7 | 8 | # print in color for better readability 9 | def prGreen(skk): 10 | print("\033[92m {}\033[00m" .format(skk)) 11 | 12 | 13 | def prRed(skk): 14 | print("\033[91m {}\033[00m" .format(skk)) 15 | 16 | 17 | def display_frame_axis(body_uid, link_index, line_length=0.05): 18 | # Red: X axis, Green: Y axis, Blue: Z axis 19 | 20 | p.addUserDebugLine([0, 0, 0], [line_length, 0, 0], [1, 0, 0], 21 | parentObjectUniqueId=body_uid, parentLinkIndex=link_index) 22 | p.addUserDebugLine([0, 0, 0], [0, line_length, 0], [0, 1, 0], 23 | parentObjectUniqueId=body_uid, parentLinkIndex=link_index) 24 | p.addUserDebugLine([0, 0, 0], [0, 0, line_length], [0, 0, 1], 25 | parentObjectUniqueId=body_uid, parentLinkIndex=link_index) 26 | 27 | 28 | def write_csv(data, csv_file, overwrite): 29 | if os.path.isfile(csv_file) & overwrite: 30 | os.remove(csv_file) 31 | with open(csv_file, 'a') as outfile: 32 | writer = csv.writer(outfile) 33 | writer.writerow(data) 34 | 35 | 36 | def format_urdf_filepath(name): 37 | dot_urdf = '.urdf' 38 | if dot_urdf not in name: 39 | name += dot_urdf 40 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 41 | # print("current_dir=" + currentdir) 42 | os.sys.path.insert(0, currentdir) 43 | 44 | return '{}/{}'.format(currentdir, name) 45 | 46 | 47 | def qinverse(q): 48 | return transforms3d.quaternions.qinverse(q[3], q[0], q[1], q[2]) 49 | 50 | 51 | def xyzw_by_euler(euler, axes): 52 | q = transforms3d.euler.euler2quat(euler[0], euler[1], euler[2], axes) # s for static = extrinsic 53 | return wxyz_to_xyzw(q) 54 | 55 | 56 | def quat_to_euler(xyzw, axes): # axes specifies the type of Euler wanted 57 | return transforms3d.euler.quat2euler(xyzw_to_wxyz(xyzw), axes) 58 | 59 | 60 | def xyzw_to_wxyz(xyzw): 61 | wxyz = [xyzw[3], xyzw[0], xyzw[1], xyzw[2]] 62 | return wxyz 63 | 64 | 65 | def wxyz_to_xyzw(wxyz): 66 | xyzw = [wxyz[1], wxyz[2], wxyz[3], wxyz[0]] 67 | return xyzw 68 | 69 | # Real robot only 70 | def mat33_by_abc(abc): 71 | """ 72 | Get matrix from Euler Angles 73 | :param abc: 74 | :return: 75 | """ 76 | a, b, c = abc 77 | return transforms3d.euler.euler2mat(a, b, c, axes='rzyx') # r for rotating = intrinsic 78 | 79 | 80 | def mat33_by_mat44(mat): 81 | """ 82 | Extract a 3x3 matrix from a 4x4 matrix. 83 | :param mat: 84 | :return: 85 | """ 86 | m = numpy.eye(3, 3) 87 | for i in range(3): 88 | for j in range(3): 89 | m[i][j] = mat[i][j] 90 | return m 91 | 92 | 93 | def mat33_by_quat(xyzw): 94 | """ 95 | Convert quaternion to matrix. 96 | :param xyzw: list, quaternion [x, y, z, w] 97 | :return: 3x3 matrix 98 | """ 99 | wxyz = [xyzw[3], xyzw[0], xyzw[1], xyzw[2]] 100 | return transforms3d.quaternions.quat2mat(wxyz) 101 | 102 | 103 | def mat33_to_quat(mat): 104 | """ 105 | Convert matrix to quaternion. 106 | :param mat: 3x3 matrix 107 | :return: list, quaternion [x, y, z, w] 108 | """ 109 | wxyz = transforms3d.quaternions.mat2quat(mat) 110 | return [wxyz[1], wxyz[2], wxyz[3], wxyz[0]] 111 | 112 | 113 | # Real robot only 114 | def mat33_to_abc(mat): 115 | """ 116 | Get Euler Angles from matrix. 117 | :param mat: 3x3 matrix 118 | :return: list, Euler Angles [z, y', x"] 119 | """ 120 | return list(transforms3d.euler.mat2euler(mat, axes='rzyx')) 121 | 122 | 123 | def mat44_by_pos_mat33(pos, mat): 124 | """ 125 | Construct a matrix using a position and matrix. 126 | :param pos: list, position [x, y, z] 127 | :param mat: 3x3 matrix. 128 | :return: 4x4 matrix 129 | """ 130 | m = numpy.eye(4, 4) 131 | for i in range(3): 132 | for j in range(3): 133 | m[i][j] = mat[i][j] 134 | m[i][3] = pos[i] 135 | return m 136 | 137 | 138 | def mat44_by_pos_quat(pos, quat): 139 | """ 140 | Construct a matrix using a position and orientation 141 | :param pos: list, position [x, y, z] 142 | :param quat: list, quaternion [x, y, z, w] 143 | :return: 4x4 matrix 144 | """ 145 | quat_mat = mat33_by_quat(quat) 146 | return mat44_by_pos_mat33(pos, quat_mat) 147 | 148 | 149 | def mat44_by_pos_abc(pos, abc): 150 | """ 151 | Construct a matrix using position and orientation. 152 | :param pos: list, position [x, y, z] 153 | :param abc: list, Euler Angles [z, y', x"] 154 | :return: 4x4 matrix 155 | """ 156 | abc_mat = mat33_by_abc(abc) 157 | return mat44_by_pos_mat33(pos, abc_mat) 158 | 159 | 160 | def mat44_to_pos_abc(mat): 161 | """ 162 | Construct position and orientation by matrix. 163 | :param mat: 4x4 matrix. 164 | :return: list, position [x, y, z], list, Euler Angles [z, y', x"] 165 | """ 166 | return mat44_to_pos(mat), mat33_to_abc(mat) 167 | 168 | 169 | def mat44_to_pos_quat(mat): 170 | """ 171 | Construct a position and orientation using a matrix. 172 | :param mat: 4x4 matrix 173 | :return: list, position [x, y, z], list, quaternion [x, y, z, w] 174 | """ 175 | pos = [] 176 | quat_mat = numpy.eye(3, 3) 177 | for i in range(3): 178 | for j in range(3): 179 | quat_mat[i][j] = mat[i][j] 180 | pos.append(mat[i][3]) 181 | quat = mat33_to_quat(quat_mat) 182 | return pos, quat 183 | 184 | 185 | def mat44_to_pos(mat): 186 | """ 187 | Get position from matrix. 188 | :param mat: 4x4 matrix 189 | :return: list, position 190 | """ 191 | return [mat[i][3] for i in range(3)] 192 | 193 | 194 | def get_relative_xform(mat_from, mat_to): 195 | """ 196 | Get the relative transform between two matrices or, in other words, using 197 | the transformation from one matrix to another matrix. 198 | :param mat_from: 4x4 matrix. 199 | :param mat_to: 4x4 matrix 200 | :return: 4x4 matrix. 201 | """ 202 | return numpy.matmul(mat_to, numpy.linalg.inv(mat_from)) 203 | 204 | 205 | def transform_mat(xform, mat): 206 | """ 207 | Transform a matrix by another matrix. 208 | :param mat: 4x4 matrix 209 | :param xform: 4x4 matrix 210 | :return: 211 | """ 212 | return numpy.matmul(xform, mat) 213 | 214 | 215 | def transform_mat_from_to(mat, mat_from, mat_to): 216 | """ 217 | Transform a matrix using the relative transform between two matrices, or, 218 | in other words, using the transformation from one matrix to another matrix. 219 | :param mat: 220 | :param mat_from: 221 | :param mat_to: 222 | :return: 223 | """ 224 | xform = get_relative_xform(mat_from, mat_to) 225 | return transform_mat(xform, mat) 226 | 227 | 228 | def get_f1_to_f2_xform(pose_from, pose_to): 229 | # util.prGreen('base pose: {}'.format(self.pose_from)) 230 | # util.prGreen('member pose: {}'.format(self.pose_to)) 231 | from_f1_to_f2 = get_relative_xform(mat44_by_pos_quat(pose_from[0], pose_from[1]), 232 | mat44_by_pos_quat(pose_to[0], pose_to[1])) 233 | return from_f1_to_f2 234 | --------------------------------------------------------------------------------