├── .gitignore
├── LICENSE.md
├── README.md
├── __init__.py
├── copy_to_rllib.py
├── copy_to_rllib
├── agents
│ └── dqn
│ │ └── dqn.py
├── evaluation
│ └── metrics.py
└── optimizers
│ ├── async_replay_optimizer.py
│ └── replay_buffer.py
├── devices
├── __init__.py
├── action.py
├── callbacks.py
├── device.py
├── devices
│ ├── __init__.py
│ ├── nothing.py
│ ├── pybulletgui.py
│ ├── spacemouse.py
│ └── xboxcontroller.py
├── pconnect.py
├── pdisconnect.py
└── pupdate.py
├── envs
├── __init__.py
├── robot_real_example.py
├── robot_sim_panda.py
├── robot_sim_robotless.py
├── task.py
├── task_real.py
├── task_sim.py
└── urdf
│ ├── panda_peg_in_hole
│ ├── insertion_box.urdf
│ ├── panda_hand.urdf
│ ├── panda_peg.urdf
│ └── stl
│ │ ├── collision
│ │ ├── finger.stl
│ │ ├── hand.stl
│ │ ├── insertion_box.stl
│ │ ├── link0.stl
│ │ ├── link1.stl
│ │ ├── link2.stl
│ │ ├── link3.stl
│ │ ├── link4.stl
│ │ ├── link5.stl
│ │ ├── link6.stl
│ │ ├── link7.stl
│ │ └── peg.stl
│ │ └── visual
│ │ ├── finger.dae
│ │ ├── hand.dae
│ │ ├── insertion_box.stl
│ │ ├── link0.dae
│ │ ├── link1.dae
│ │ ├── link2.dae
│ │ ├── link3.dae
│ │ ├── link4.dae
│ │ ├── link5.dae
│ │ ├── link6.dae
│ │ ├── link7.dae
│ │ └── peg.stl
│ └── robotless_lap_joint
│ ├── stl
│ ├── task_lap_90deg_0mm_1.stl
│ ├── task_lap_90deg_0mm_2.stl
│ ├── task_lap_90deg_0mm_3.stl
│ ├── task_lap_90deg_0mm_visual.stl
│ ├── task_lap_90deg_1mm_1.stl
│ ├── task_lap_90deg_1mm_2.stl
│ ├── task_lap_90deg_1mm_3.stl
│ ├── task_lap_90deg_1mm_visual.stl
│ ├── task_lap_90deg_2mm_1.stl
│ ├── task_lap_90deg_2mm_2.stl
│ ├── task_lap_90deg_2mm_3.stl
│ ├── task_lap_90deg_2mm_visual.stl
│ ├── task_lap_90deg_3mm_1.stl
│ ├── task_lap_90deg_3mm_2.stl
│ ├── task_lap_90deg_3mm_3.stl
│ ├── task_lap_90deg_3mm_visual.stl
│ ├── tool_gripper_collision_1.stl
│ ├── tool_gripper_collision_2.stl
│ ├── tool_gripper_collision_3.stl
│ ├── tool_gripper_collision_4.stl
│ ├── tool_gripper_collision_5.stl
│ ├── tool_gripper_collision_6.stl
│ ├── tool_gripper_collision_7.stl
│ ├── tool_gripper_collision_8.stl
│ ├── tool_gripper_visual.stl
│ ├── tool_lap_90deg_0mm_collision_1.stl
│ ├── tool_lap_90deg_0mm_collision_2.stl
│ ├── tool_lap_90deg_0mm_collision_3.stl
│ ├── tool_lap_90deg_0mm_visual.stl
│ ├── tool_sensor_collision_1.stl
│ ├── tool_sensor_collision_2.stl
│ ├── tool_sensor_collision_3.stl
│ └── tool_sensor_visual.stl
│ ├── task_lap_90deg.urdf
│ └── tool.urdf
├── envs_launcher.py
├── hyper_parameters
└── assembly_apex_ddpg.yaml
├── rollout.py
├── run.py
├── train.py
└── utilities.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.pyc
3 | .DS_Store
4 | .idea/
5 | _ignore/
6 | human_demo_data/
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Autodesk Research, Autodesk Inc. Copyright (c) 2020 Gramazio Kohler Research, ETH Zürich
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 |
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Autodesk Deep Reinforcement Learning for Robotic Assembly
2 |
3 | Here we provide a sim-to-real RL training and testing environment for robotic assembly, as well as a modification to APEX-DDPG by adding the option of recording and using human demonstrations. We include two examples in simulation (pybullet), Franka Panda robot performing the peg-in-hole task and a robot-less end-effector performing the lap joint task. We also include a template for connecting your real robot, as you roll out a successfully learned policy.
4 |
5 | ## Installation
6 |
7 | This repository is tested to be compatible with Ray 0.7.5. Hence, the following instruction is for working with Ray 0.7.5. Please feel free to try later versions of Ray and modify the code accordingly.
8 |
9 | 1. Install the conda environment for ray 0.7.5: https://pypi.org/project/ray/. Use Python 3.6. `$ pip install ray==0.7.5`
10 | 2. Install the following dependencies:
11 | * [pybullet](https://pypi.org/project/pybullet/)
12 | * [tensorflow](https://pypi.org/project/tensorflow/)
13 | * [gym](https://pypi.org/project/gym/)
14 | * [opencv-python](https://pypi.org/project/opencv-python/)
15 | * [getch](https://pypi.org/project/getch/)
16 | * [pygame](https://pypi.org/project/pygame/)
17 | * [transforms3d](https://pypi.org/project/transforms3d/)
18 |
19 | ```
20 | $ pip install pybullet==2.2.6 tensorflow==1.10.0 gym opencv-python getch pygame transforms3d
21 | ```
22 | 3. Download the ray source code from https://github.com/ray-project/ray/releases/tag/ray-0.7.5 and keep the rllib folder in your local working directory.
23 | 4. In the ray folder, find python/ray/setup-dev.py and run `$ python setup-dev.py` to link to the local rllib.
24 | 5. Clone this repository inside the rllib folder.
25 | 6. Run `$ python copy-to-rllib.py` to install the patch.
26 |
27 | ## Run
28 |
29 | Configure the parameters:
30 | - Environment parameters in `envs_launcher.py`
31 | - Training hyper-parameters in `hyper_parameters/*.yaml` file.
32 |
33 | Test and visualize the simulation environment with default input device:
34 | ```
35 | python run.py
36 | ```
37 |
38 | Provide a demonstration with the xbox controller:
39 |
40 | ```
41 | python run.py --input-type xbc --save-demo-data=True --demo-data-path=human_demo_data/
42 | ```
43 |
44 | Train a model:
45 |
46 | ```
47 | python train.py -f hyper_parameters/*.yaml
48 | ```
49 |
50 | Roll out a model:
51 |
52 | ```
53 | python rollout.py /checkpoint-/checkpoint-
54 | ```
55 |
56 | ## Notes
57 |
58 | - All code is written in [Python 3.6](https://www.python.org/).
59 | - All code was tested on MacOS, Windows, and Ubuntu.
60 | - For licensing information see [LICENSE](LICENSE.md).
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/__init__.py
--------------------------------------------------------------------------------
/copy_to_rllib.py:
--------------------------------------------------------------------------------
1 | # rllib patch
2 |
3 | from shutil import copyfile
4 | import os
5 |
6 | # show all the stats in terminal, not just one third
7 | dqn_src = "copy_to_rllib/agents/dqn/dqn.py"
8 | dqn_dst = "../agents/dqn/dqn.py"
9 | copyfile(dqn_src, dqn_dst)
10 |
11 | # human demonstrations & dynamic experience replay
12 | async_src = "copy_to_rllib/optimizers/async_replay_optimizer.py"
13 | async_dst = "../optimizers/async_replay_optimizer.py"
14 | copyfile(async_src, async_dst)
15 | buffer_src = "copy_to_rllib/optimizers/replay_buffer.py"
16 | buffer_dst = "../optimizers/replay_buffer.py"
17 | copyfile(buffer_src, buffer_dst)
18 |
19 | # calculate the custom metrics for one transition,
20 | # not the historic transitions designed by Ray
21 | metrics_src = "copy_to_rllib/evaluation/metrics.py"
22 | metrics_dst = "../evaluation/metrics.py"
23 | copyfile(metrics_src, metrics_dst)
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/copy_to_rllib/agents/dqn/dqn.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import logging
6 |
7 | from ray.rllib.agents.trainer import with_common_config
8 | from ray.rllib.agents.trainer_template import build_trainer
9 | from ray.rllib.agents.dqn.dqn_policy import DQNTFPolicy
10 | from ray.rllib.agents.dqn.simple_q_policy import SimpleQPolicy
11 | from ray.rllib.optimizers import SyncReplayOptimizer
12 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
13 | from ray.rllib.utils.schedules import ConstantSchedule, LinearSchedule
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 | # yapf: disable
18 | # __sphinx_doc_begin__
19 | DEFAULT_CONFIG = with_common_config({
20 | # === Model ===
21 | # Number of atoms for representing the distribution of return. When
22 | # this is greater than 1, distributional Q-learning is used.
23 | # the discrete supports are bounded by v_min and v_max
24 | "num_atoms": 1,
25 | "v_min": -10.0,
26 | "v_max": 10.0,
27 | # Whether to use noisy network
28 | "noisy": False,
29 | # control the initial value of noisy nets
30 | "sigma0": 0.5,
31 | # Whether to use dueling dqn
32 | "dueling": True,
33 | # Whether to use double dqn
34 | "double_q": True,
35 | # Postprocess model outputs with these hidden layers to compute the
36 | # state and action values. See also the model config in catalog.py.
37 | "hiddens": [256],
38 | # N-step Q learning
39 | "n_step": 1,
40 |
41 | # === Exploration ===
42 | # Max num timesteps for annealing schedules. Exploration is annealed from
43 | # 1.0 to exploration_fraction over this number of timesteps scaled by
44 | # exploration_fraction
45 | "schedule_max_timesteps": 100000,
46 | # Minimum env steps to optimize for per train call. This value does
47 | # not affect learning, only the length of iterations.
48 | "timesteps_per_iteration": 1000,
49 | # Fraction of entire training period over which the exploration rate is
50 | # annealed
51 | "exploration_fraction": 0.1,
52 | # Final value of random action probability
53 | "exploration_final_eps": 0.02,
54 | # Update the target network every `target_network_update_freq` steps.
55 | "target_network_update_freq": 500,
56 | # Use softmax for sampling actions. Required for off policy estimation.
57 | "soft_q": False,
58 | # Softmax temperature. Q values are divided by this value prior to softmax.
59 | # Softmax approaches argmax as the temperature drops to zero.
60 | "softmax_temp": 1.0,
61 | # If True parameter space noise will be used for exploration
62 | # See https://blog.openai.com/better-exploration-with-parameter-noise/
63 | "parameter_noise": False,
64 | # Extra configuration that disables exploration.
65 | "evaluation_config": {
66 | "exploration_fraction": 0,
67 | "exploration_final_eps": 0,
68 | },
69 |
70 | # === Replay buffer ===
71 | # Size of the replay buffer. Note that if async_updates is set, then
72 | # each worker will have a replay buffer of this size.
73 | "buffer_size": 50000,
74 | # If True prioritized replay buffer will be used.
75 | "prioritized_replay": True,
76 | # Alpha parameter for prioritized replay buffer.
77 | "prioritized_replay_alpha": 0.6,
78 | # Beta parameter for sampling from prioritized replay buffer.
79 | "prioritized_replay_beta": 0.4,
80 | # Fraction of entire training period over which the beta parameter is
81 | # annealed
82 | "beta_annealing_fraction": 0.2,
83 | # Final value of beta
84 | "final_prioritized_replay_beta": 0.4,
85 | # Epsilon to add to the TD errors when updating priorities.
86 | "prioritized_replay_eps": 1e-6,
87 | # Whether to LZ4 compress observations
88 | "compress_observations": True,
89 |
90 | # === Optimization ===
91 | # Learning rate for adam optimizer
92 | "lr": 5e-4,
93 | # Learning rate schedule
94 | "lr_schedule": None,
95 | # Adam epsilon hyper parameter
96 | "adam_epsilon": 1e-8,
97 | # If not None, clip gradients during optimization at this value
98 | "grad_norm_clipping": 40,
99 | # How many steps of the model to sample before learning starts.
100 | "learning_starts": 1000,
101 | # Update the replay buffer with this many samples at once. Note that
102 | # this setting applies per-worker if num_workers > 1.
103 | "sample_batch_size": 4,
104 | # Size of a batched sampled from replay buffer for training. Note that
105 | # if async_updates is set, then each worker returns gradients for a
106 | # batch of this size.
107 | "train_batch_size": 32,
108 |
109 | # === Parallelism ===
110 | # Number of workers for collecting samples with. This only makes sense
111 | # to increase if your environment is particularly slow to sample, or if
112 | # you"re using the Async or Ape-X optimizers.
113 | "num_workers": 0,
114 | # Whether to use a distribution of epsilons across workers for exploration.
115 | "per_worker_exploration": False,
116 | # Whether to compute priorities on workers.
117 | "worker_side_prioritization": False,
118 | # Prevent iterations from going lower than this time span
119 | "min_iter_time_s": 1,
120 | })
121 | # __sphinx_doc_end__
122 | # yapf: enable
123 |
124 |
125 | def make_optimizer(workers, config):
126 | return SyncReplayOptimizer(
127 | workers,
128 | learning_starts=config["learning_starts"],
129 | buffer_size=config["buffer_size"],
130 | prioritized_replay=config["prioritized_replay"],
131 | prioritized_replay_alpha=config["prioritized_replay_alpha"],
132 | prioritized_replay_beta=config["prioritized_replay_beta"],
133 | schedule_max_timesteps=config["schedule_max_timesteps"],
134 | beta_annealing_fraction=config["beta_annealing_fraction"],
135 | final_prioritized_replay_beta=config["final_prioritized_replay_beta"],
136 | prioritized_replay_eps=config["prioritized_replay_eps"],
137 | train_batch_size=config["train_batch_size"],
138 | sample_batch_size=config["sample_batch_size"],
139 | **config["optimizer"])
140 |
141 |
142 | def check_config_and_setup_param_noise(config):
143 | """Update the config based on settings.
144 |
145 | Rewrites sample_batch_size to take into account n_step truncation, and also
146 | adds the necessary callbacks to support parameter space noise exploration.
147 | """
148 |
149 | # Update effective batch size to include n-step
150 | adjusted_batch_size = max(config["sample_batch_size"],
151 | config.get("n_step", 1))
152 | config["sample_batch_size"] = adjusted_batch_size
153 |
154 | if config.get("parameter_noise", False):
155 | if config["batch_mode"] != "complete_episodes":
156 | raise ValueError("Exploration with parameter space noise requires "
157 | "batch_mode to be complete_episodes.")
158 | if config.get("noisy", False):
159 | raise ValueError(
160 | "Exploration with parameter space noise and noisy network "
161 | "cannot be used at the same time.")
162 | if config["callbacks"]["on_episode_start"]:
163 | start_callback = config["callbacks"]["on_episode_start"]
164 | else:
165 | start_callback = None
166 |
167 | def on_episode_start(info):
168 | # as a callback function to sample and pose parameter space
169 | # noise on the parameters of network
170 | policies = info["policy"]
171 | for pi in policies.values():
172 | pi.add_parameter_noise()
173 | if start_callback:
174 | start_callback(info)
175 |
176 | config["callbacks"]["on_episode_start"] = on_episode_start
177 | if config["callbacks"]["on_episode_end"]:
178 | end_callback = config["callbacks"]["on_episode_end"]
179 | else:
180 | end_callback = None
181 |
182 | def on_episode_end(info):
183 | # as a callback function to monitor the distance
184 | # between noisy policy and original policy
185 | policies = info["policy"]
186 | episode = info["episode"]
187 | model = policies[DEFAULT_POLICY_ID].model
188 | if hasattr(model, "pi_distance"):
189 | episode.custom_metrics["policy_distance"] = model.pi_distance
190 | if end_callback:
191 | end_callback(info)
192 |
193 | config["callbacks"]["on_episode_end"] = on_episode_end
194 |
195 |
196 | def get_initial_state(config):
197 | return {
198 | "last_target_update_ts": 0,
199 | "num_target_updates": 0,
200 | }
201 |
202 |
203 | def make_exploration_schedule(config, worker_index):
204 | # Use either a different `eps` per worker, or a linear schedule.
205 | if config["per_worker_exploration"]:
206 | assert config["num_workers"] > 1, \
207 | "This requires multiple workers"
208 | if worker_index >= 0:
209 | # Exploration constants from the Ape-X paper
210 | exponent = (
211 | 1 + worker_index / float(config["num_workers"] - 1) * 7)
212 | return ConstantSchedule(0.4**exponent)
213 | else:
214 | # local ev should have zero exploration so that eval rollouts
215 | # run properly
216 | return ConstantSchedule(0.0)
217 | return LinearSchedule(
218 | schedule_timesteps=int(
219 | config["exploration_fraction"] * config["schedule_max_timesteps"]),
220 | initial_p=1.0,
221 | final_p=config["exploration_final_eps"])
222 |
223 |
224 | def setup_exploration(trainer):
225 | trainer.exploration0 = make_exploration_schedule(trainer.config, -1)
226 | trainer.explorations = [
227 | make_exploration_schedule(trainer.config, i)
228 | for i in range(trainer.config["num_workers"])
229 | ]
230 |
231 |
232 | def update_worker_explorations(trainer):
233 | global_timestep = trainer.optimizer.num_steps_sampled
234 | exp_vals = [trainer.exploration0.value(global_timestep)]
235 | trainer.workers.local_worker().foreach_trainable_policy(
236 | lambda p, _: p.set_epsilon(exp_vals[0]))
237 | for i, e in enumerate(trainer.workers.remote_workers()):
238 | exp_val = trainer.explorations[i].value(global_timestep)
239 | e.foreach_trainable_policy.remote(lambda p, _: p.set_epsilon(exp_val))
240 | exp_vals.append(exp_val)
241 | trainer.train_start_timestep = global_timestep
242 | trainer.cur_exp_vals = exp_vals
243 |
244 |
245 | def add_trainer_metrics(trainer, result):
246 | global_timestep = trainer.optimizer.num_steps_sampled
247 | result.update(
248 | timesteps_this_iter=global_timestep - trainer.train_start_timestep,
249 | info=dict({
250 | "min_exploration": min(trainer.cur_exp_vals),
251 | "max_exploration": max(trainer.cur_exp_vals),
252 | "num_target_updates": trainer.state["num_target_updates"],
253 | }, **trainer.optimizer.stats()))
254 |
255 |
256 | def update_target_if_needed(trainer, fetches):
257 | global_timestep = trainer.optimizer.num_steps_sampled
258 | if global_timestep - trainer.state["last_target_update_ts"] > \
259 | trainer.config["target_network_update_freq"]:
260 | trainer.workers.local_worker().foreach_trainable_policy(
261 | lambda p, _: p.update_target())
262 | trainer.state["last_target_update_ts"] = global_timestep
263 | trainer.state["num_target_updates"] += 1
264 |
265 |
266 | def collect_metrics(trainer):
267 | if trainer.config["per_worker_exploration"]:
268 | # Only collect metrics from the third of workers with lowest eps
269 | result = trainer.collect_metrics(
270 | selected_workers=trainer.workers.remote_workers()[
271 | -len(trainer.workers.remote_workers()) // 1:])
272 | else:
273 | result = trainer.collect_metrics()
274 | return result
275 |
276 |
277 | def disable_exploration(trainer):
278 | trainer.evaluation_workers.local_worker().foreach_policy(
279 | lambda p, _: p.set_epsilon(0))
280 |
281 |
282 | GenericOffPolicyTrainer = build_trainer(
283 | name="GenericOffPolicyAlgorithm",
284 | default_policy=None,
285 | default_config=DEFAULT_CONFIG,
286 | validate_config=check_config_and_setup_param_noise,
287 | get_initial_state=get_initial_state,
288 | make_policy_optimizer=make_optimizer,
289 | before_init=setup_exploration,
290 | before_train_step=update_worker_explorations,
291 | after_optimizer_step=update_target_if_needed,
292 | after_train_result=add_trainer_metrics,
293 | collect_metrics_fn=collect_metrics,
294 | before_evaluate_fn=disable_exploration)
295 |
296 | DQNTrainer = GenericOffPolicyTrainer.with_updates(
297 | name="DQN", default_policy=DQNTFPolicy, default_config=DEFAULT_CONFIG)
298 |
299 | SimpleQTrainer = DQNTrainer.with_updates(default_policy=SimpleQPolicy)
300 |
--------------------------------------------------------------------------------
/copy_to_rllib/evaluation/metrics.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import logging
6 | import numpy as np
7 | import collections
8 |
9 | import ray
10 | from ray.rllib.evaluation.rollout_metrics import RolloutMetrics
11 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
12 | from ray.rllib.offline.off_policy_estimator import OffPolicyEstimate
13 | from ray.rllib.policy.policy import LEARNER_STATS_KEY
14 | from ray.rllib.utils.annotations import DeveloperAPI
15 | from ray.rllib.utils.memory import ray_get_and_free
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 |
20 | @DeveloperAPI
21 | def get_learner_stats(grad_info):
22 | """Return optimization stats reported from the policy.
23 |
24 | Example:
25 | >>> grad_info = evaluator.learn_on_batch(samples)
26 | >>> print(get_stats(grad_info))
27 | {"vf_loss": ..., "policy_loss": ...}
28 | """
29 |
30 | if LEARNER_STATS_KEY in grad_info:
31 | return grad_info[LEARNER_STATS_KEY]
32 |
33 | multiagent_stats = {}
34 | for k, v in grad_info.items():
35 | if type(v) is dict:
36 | if LEARNER_STATS_KEY in v:
37 | multiagent_stats[k] = v[LEARNER_STATS_KEY]
38 |
39 | return multiagent_stats
40 |
41 |
42 | @DeveloperAPI
43 | def collect_metrics(local_worker=None,
44 | remote_workers=[],
45 | to_be_collected=[],
46 | timeout_seconds=180):
47 | """Gathers episode metrics from RolloutWorker instances."""
48 |
49 | episodes, to_be_collected = collect_episodes(
50 | local_worker,
51 | remote_workers,
52 | to_be_collected,
53 | timeout_seconds=timeout_seconds)
54 | metrics = summarize_episodes(episodes, episodes)
55 | return metrics
56 |
57 |
58 | @DeveloperAPI
59 | def collect_episodes(local_worker=None,
60 | remote_workers=[],
61 | to_be_collected=[],
62 | timeout_seconds=180):
63 | """Gathers new episodes metrics tuples from the given evaluators."""
64 |
65 | if remote_workers:
66 | pending = [
67 | a.apply.remote(lambda ev: ev.get_metrics()) for a in remote_workers
68 | ] + to_be_collected
69 | collected, to_be_collected = ray.wait(
70 | pending, num_returns=len(pending), timeout=timeout_seconds * 1.0)
71 | if pending and len(collected) == 0:
72 | logger.warning(
73 | "WARNING: collected no metrics in {} seconds".format(
74 | timeout_seconds))
75 | metric_lists = ray_get_and_free(collected)
76 | else:
77 | metric_lists = []
78 |
79 | if local_worker:
80 | metric_lists.append(local_worker.get_metrics())
81 | episodes = []
82 | for metrics in metric_lists:
83 | episodes.extend(metrics)
84 | return episodes, to_be_collected
85 |
86 |
87 | @DeveloperAPI
88 | def summarize_episodes(episodes, new_episodes):
89 | """Summarizes a set of episode metrics tuples.
90 |
91 | Arguments:
92 | episodes: smoothed set of episodes including historical ones
93 | new_episodes: just the new episodes in this iteration
94 | """
95 |
96 | episodes, estimates = _partition(episodes)
97 | new_episodes, _ = _partition(new_episodes)
98 |
99 | episode_rewards = []
100 | episode_lengths = []
101 | policy_rewards = collections.defaultdict(list)
102 | custom_metrics = collections.defaultdict(list)
103 | perf_stats = collections.defaultdict(list)
104 | for episode in episodes:
105 | episode_lengths.append(episode.episode_length)
106 | episode_rewards.append(episode.episode_reward)
107 | # for k, v in episode.custom_metrics.items():
108 | # custom_metrics[k].append(v)
109 | for k, v in episode.perf_stats.items():
110 | perf_stats[k].append(v)
111 | for (_, policy_id), reward in episode.agent_rewards.items():
112 | if policy_id != DEFAULT_POLICY_ID:
113 | policy_rewards[policy_id].append(reward)
114 | # only calculate the custom_metrics on the episodes in the current iteration
115 | # to have an accurate successful rate
116 | for episode in new_episodes:
117 | for k, v in episode.custom_metrics.items():
118 | custom_metrics[k].append(v)
119 | if episode_rewards:
120 | min_reward = min(episode_rewards)
121 | max_reward = max(episode_rewards)
122 | else:
123 | min_reward = float("nan")
124 | max_reward = float("nan")
125 | avg_reward = np.mean(episode_rewards)
126 | avg_length = np.mean(episode_lengths)
127 |
128 | policy_reward_min = {}
129 | policy_reward_mean = {}
130 | policy_reward_max = {}
131 | for policy_id, rewards in policy_rewards.copy().items():
132 | policy_reward_min[policy_id] = np.min(rewards)
133 | policy_reward_mean[policy_id] = np.mean(rewards)
134 | policy_reward_max[policy_id] = np.max(rewards)
135 |
136 | for k, v_list in custom_metrics.copy().items():
137 | custom_metrics[k + "_mean"] = np.mean(v_list)
138 | filt = [v for v in v_list if not np.isnan(v)]
139 | if filt:
140 | custom_metrics[k + "_min"] = np.min(filt)
141 | custom_metrics[k + "_max"] = np.max(filt)
142 | else:
143 | custom_metrics[k + "_min"] = float("nan")
144 | custom_metrics[k + "_max"] = float("nan")
145 | del custom_metrics[k]
146 |
147 | for k, v_list in perf_stats.copy().items():
148 | perf_stats[k] = np.mean(v_list)
149 |
150 | estimators = collections.defaultdict(lambda: collections.defaultdict(list))
151 | for e in estimates:
152 | acc = estimators[e.estimator_name]
153 | for k, v in e.metrics.items():
154 | acc[k].append(v)
155 | for name, metrics in estimators.items():
156 | for k, v_list in metrics.items():
157 | metrics[k] = np.mean(v_list)
158 | estimators[name] = dict(metrics)
159 |
160 | return dict(
161 | episode_reward_max=max_reward,
162 | episode_reward_min=min_reward,
163 | episode_reward_mean=avg_reward,
164 | episode_len_mean=avg_length,
165 | episodes_this_iter=len(new_episodes),
166 | policy_reward_min=policy_reward_min,
167 | policy_reward_max=policy_reward_max,
168 | policy_reward_mean=policy_reward_mean,
169 | custom_metrics=dict(custom_metrics),
170 | sampler_perf=dict(perf_stats),
171 | off_policy_estimator=dict(estimators))
172 |
173 |
174 | def _partition(episodes):
175 | """Divides metrics data into true rollouts vs off-policy estimates."""
176 |
177 | rollouts, estimates = [], []
178 | for e in episodes:
179 | if isinstance(e, RolloutMetrics):
180 | rollouts.append(e)
181 | elif isinstance(e, OffPolicyEstimate):
182 | estimates.append(e)
183 | else:
184 | raise ValueError("Unknown metric type: {}".format(e))
185 | return rollouts, estimates
186 |
--------------------------------------------------------------------------------
/copy_to_rllib/optimizers/async_replay_optimizer.py:
--------------------------------------------------------------------------------
1 | """Implements Distributed Prioritized Experience Replay.
2 |
3 | https://arxiv.org/abs/1803.00933"""
4 |
5 | from __future__ import absolute_import
6 | from __future__ import division
7 | from __future__ import print_function
8 |
9 | import collections
10 | import os
11 | import random
12 | import time
13 | import threading
14 |
15 | import numpy as np
16 | from six.moves import queue
17 |
18 | import ray
19 | from ray.rllib.evaluation.metrics import get_learner_stats
20 | from ray.rllib.policy.sample_batch import SampleBatch, DEFAULT_POLICY_ID, \
21 | MultiAgentBatch
22 | from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer
23 | from ray.rllib.optimizers.replay_buffer import PrioritizedReplayBuffer
24 | from ray.rllib.utils.annotations import override
25 | from ray.rllib.utils.actors import TaskPool, create_colocated
26 | from ray.rllib.utils.memory import ray_get_and_free
27 | from ray.rllib.utils.timer import TimerStat
28 | from ray.rllib.utils.window_stat import WindowStat
29 |
30 | SAMPLE_QUEUE_DEPTH = 2
31 | REPLAY_QUEUE_DEPTH = 4
32 | LEARNER_QUEUE_MAX_SIZE = 16
33 |
34 |
35 | # added for dynamic experience replay
36 | import os, os.path, inspect
37 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
38 | parentdir = os.path.dirname(currentdir)
39 | import pickle
40 | from collections import deque
41 |
42 | # print texts in green in terminal
43 | def prGreen(skk): print("\033[92m {}\033[00m" .format(skk))
44 |
45 | # filter out invisible files and sub-folders
46 | def file_filter(names):
47 | filtered_files = []
48 | for name in names:
49 | if name.startswith(".") is False:
50 | filtered_files.append(name)
51 | return filtered_files
52 | # added for dynamic experience replay
53 |
54 |
55 | class AsyncReplayOptimizer(PolicyOptimizer):
56 | """Main event loop of the Ape-X optimizer (async sampling with replay).
57 |
58 | This class coordinates the data transfers between the learner thread,
59 | remote workers (Ape-X actors), and replay buffer actors.
60 |
61 | This has two modes of operation:
62 | - normal replay: replays independent samples.
63 | - batch replay: simplified mode where entire sample batches are
64 | replayed. This supports RNNs, but not prioritization.
65 |
66 | This optimizer requires that rollout workers return an additional
67 | "td_error" array in the info return of compute_gradients(). This error
68 | term will be used for sample prioritization."""
69 |
70 | def __init__(self,
71 | workers,
72 | learning_starts=1000,
73 | buffer_size=10000,
74 | prioritized_replay=True,
75 | prioritized_replay_alpha=0.6,
76 | prioritized_replay_beta=0.4,
77 | prioritized_replay_eps=1e-6,
78 | train_batch_size=512,
79 | sample_batch_size=50,
80 | num_replay_buffer_shards=1,
81 | max_weight_sync_delay=400,
82 | debug=False,
83 | batch_replay=False,
84 | # added for dynamic experience replay
85 | human_demonstration=False,
86 | multiple_human_data=-1,
87 | human_data_dir=None,
88 | dynamic_experience_replay=False,
89 | demonstration_zone_percentage=0,
90 | robot_demo_path=None):
91 | """Initialize an async replay optimizer.
92 |
93 | Arguments:
94 | workers (WorkerSet): all workers
95 | learning_starts (int): wait until this many steps have been sampled
96 | before starting optimization.
97 | buffer_size (int): max size of the replay buffer
98 | prioritized_replay (bool): whether to enable prioritized replay
99 | prioritized_replay_alpha (float): replay alpha hyperparameter
100 | prioritized_replay_beta (float): replay beta hyperparameter
101 | prioritized_replay_eps (float): replay eps hyperparameter
102 | train_batch_size (int): size of batches to learn on
103 | sample_batch_size (int): size of batches to sample from workers
104 | num_replay_buffer_shards (int): number of actors to use to store
105 | replay samples
106 | max_weight_sync_delay (int): update the weights of a rollout worker
107 | after collecting this number of timesteps from it
108 | debug (bool): return extra debug stats
109 | batch_replay (bool): replay entire sequential batches of
110 | experiences instead of sampling steps individually
111 | """
112 | PolicyOptimizer.__init__(self, workers)
113 |
114 | self.debug = debug
115 | self.batch_replay = batch_replay
116 | self.replay_starts = learning_starts
117 | self.prioritized_replay_beta = prioritized_replay_beta
118 | self.prioritized_replay_eps = prioritized_replay_eps
119 | self.max_weight_sync_delay = max_weight_sync_delay
120 |
121 | self.learner = LearnerThread(self.workers.local_worker())
122 | self.learner.start()
123 |
124 | # added for dynamic experience replay
125 | self.dynamic_experience_replay = dynamic_experience_replay
126 |
127 | if self.batch_replay:
128 | replay_cls = BatchReplayActor
129 | else:
130 | replay_cls = ReplayActor
131 | self.replay_actors = create_colocated(replay_cls, [
132 | num_replay_buffer_shards,
133 | learning_starts,
134 | buffer_size,
135 | train_batch_size,
136 | prioritized_replay_alpha,
137 | prioritized_replay_beta,
138 | prioritized_replay_eps,
139 | # added for dynamic experience replay
140 | human_demonstration,
141 | multiple_human_data,
142 | human_data_dir,
143 | dynamic_experience_replay,
144 | demonstration_zone_percentage,
145 | robot_demo_path
146 | ], num_replay_buffer_shards)
147 |
148 | # Stats
149 | self.timers = {
150 | k: TimerStat()
151 | for k in [
152 | "put_weights", "get_samples", "sample_processing",
153 | "replay_processing", "update_priorities", "train", "sample"
154 | ]
155 | }
156 | self.num_weight_syncs = 0
157 | self.num_samples_dropped = 0
158 | self.learning_started = False
159 |
160 | # Number of worker steps since the last weight update
161 | self.steps_since_update = {}
162 |
163 | # Otherwise kick of replay tasks for local gradient updates
164 | self.replay_tasks = TaskPool()
165 | for ra in self.replay_actors:
166 | for _ in range(REPLAY_QUEUE_DEPTH):
167 | self.replay_tasks.add(ra, ra.replay.remote())
168 |
169 | # Kick off async background sampling
170 | self.sample_tasks = TaskPool()
171 | if self.workers.remote_workers():
172 | self._set_workers(self.workers.remote_workers())
173 |
174 | @override(PolicyOptimizer)
175 | def step(self):
176 | assert self.learner.is_alive()
177 | assert len(self.workers.remote_workers()) > 0
178 | start = time.time()
179 | sample_timesteps, train_timesteps = self._step()
180 | time_delta = time.time() - start
181 | self.timers["sample"].push(time_delta)
182 | self.timers["sample"].push_units_processed(sample_timesteps)
183 | if train_timesteps > 0:
184 | self.learning_started = True
185 | if self.learning_started:
186 | self.timers["train"].push(time_delta)
187 | self.timers["train"].push_units_processed(train_timesteps)
188 | self.num_steps_sampled += sample_timesteps
189 | self.num_steps_trained += train_timesteps
190 |
191 | @override(PolicyOptimizer)
192 | def stop(self):
193 | for r in self.replay_actors:
194 | r.__ray_terminate__.remote()
195 | self.learner.stopped = True
196 |
197 | @override(PolicyOptimizer)
198 | def reset(self, remote_workers):
199 | self.workers.reset(remote_workers)
200 | self.sample_tasks.reset_workers(remote_workers)
201 |
202 | @override(PolicyOptimizer)
203 | def stats(self):
204 | replay_stats = ray_get_and_free(self.replay_actors[0].stats.remote(
205 | self.debug))
206 | timing = {
207 | "{}_time_ms".format(k): round(1000 * self.timers[k].mean, 3)
208 | for k in self.timers
209 | }
210 | timing["learner_grad_time_ms"] = round(
211 | 1000 * self.learner.grad_timer.mean, 3)
212 | timing["learner_dequeue_time_ms"] = round(
213 | 1000 * self.learner.queue_timer.mean, 3)
214 | stats = {
215 | "sample_throughput": round(self.timers["sample"].mean_throughput,
216 | 3),
217 | "train_throughput": round(self.timers["train"].mean_throughput, 3),
218 | "num_weight_syncs": self.num_weight_syncs,
219 | "num_samples_dropped": self.num_samples_dropped,
220 | "learner_queue": self.learner.learner_queue_size.stats(),
221 | "replay_shard_0": replay_stats,
222 | }
223 | debug_stats = {
224 | "timing_breakdown": timing,
225 | "pending_sample_tasks": self.sample_tasks.count,
226 | "pending_replay_tasks": self.replay_tasks.count,
227 | }
228 | if self.debug:
229 | stats.update(debug_stats)
230 | if self.learner.stats:
231 | stats["learner"] = self.learner.stats
232 | return dict(PolicyOptimizer.stats(self), **stats)
233 |
234 | # For https://github.com/ray-project/ray/issues/2541 only
235 | def _set_workers(self, remote_workers):
236 | self.workers.reset(remote_workers)
237 | weights = self.workers.local_worker().get_weights()
238 | for ev in self.workers.remote_workers():
239 | ev.set_weights.remote(weights)
240 | self.steps_since_update[ev] = 0
241 | for _ in range(SAMPLE_QUEUE_DEPTH):
242 | self.sample_tasks.add(ev, ev.sample_with_count.remote())
243 |
244 | def _step(self):
245 | sample_timesteps, train_timesteps = 0, 0
246 | weights = None
247 |
248 | with self.timers["sample_processing"]:
249 | completed = list(self.sample_tasks.completed())
250 | counts = ray_get_and_free([c[1][1] for c in completed])
251 | for i, (ev, (sample_batch, count)) in enumerate(completed):
252 | sample_timesteps += counts[i]
253 |
254 | # Send the data to the replay buffer
255 | random.choice(
256 | self.replay_actors).add_batch.remote(sample_batch)
257 |
258 | # Update weights if needed
259 | self.steps_since_update[ev] += counts[i]
260 | if self.steps_since_update[ev] >= self.max_weight_sync_delay:
261 | # Note that it's important to pull new weights once
262 | # updated to avoid excessive correlation between actors
263 | if weights is None or self.learner.weights_updated:
264 | self.learner.weights_updated = False
265 | with self.timers["put_weights"]:
266 | weights = ray.put(
267 | self.workers.local_worker().get_weights())
268 | ev.set_weights.remote(weights)
269 | self.num_weight_syncs += 1
270 | self.steps_since_update[ev] = 0
271 |
272 | # Kick off another sample request
273 | self.sample_tasks.add(ev, ev.sample_with_count.remote())
274 |
275 | # added for dynamic experience replay
276 | if self.dynamic_experience_replay and random.random() < 0.0001:
277 | random.choice(self.replay_actors).update_demos.remote()
278 |
279 | with self.timers["replay_processing"]:
280 | for ra, replay in self.replay_tasks.completed():
281 | self.replay_tasks.add(ra, ra.replay.remote())
282 | if self.learner.inqueue.full():
283 | self.num_samples_dropped += 1
284 | else:
285 | with self.timers["get_samples"]:
286 | samples = ray_get_and_free(replay)
287 | # Defensive copy against plasma crashes, see #2610 #3452
288 | self.learner.inqueue.put((ra, samples and samples.copy()))
289 |
290 | with self.timers["update_priorities"]:
291 | while not self.learner.outqueue.empty():
292 | ra, prio_dict, count = self.learner.outqueue.get()
293 | ra.update_priorities.remote(prio_dict)
294 | train_timesteps += count
295 |
296 | return sample_timesteps, train_timesteps
297 |
298 |
299 | @ray.remote(num_cpus=0)
300 | class ReplayActor(object):
301 | """A replay buffer shard.
302 |
303 | Ray actors are single-threaded, so for scalability multiple replay actors
304 | may be created to increase parallelism."""
305 |
306 | def __init__(self, num_shards, learning_starts, buffer_size,
307 | train_batch_size, prioritized_replay_alpha,
308 | prioritized_replay_beta, prioritized_replay_eps,
309 | # added for dynamic experience replay
310 | human_demonstration, multiple_human_data, human_data_dir,
311 | dynamic_experience_replay, demonstration_zone_percentage, robot_demo_path):
312 | self.replay_starts = learning_starts // num_shards
313 | self.buffer_size = buffer_size // num_shards
314 | self.train_batch_size = train_batch_size
315 | self.prioritized_replay_beta = prioritized_replay_beta
316 | self.prioritized_replay_eps = prioritized_replay_eps
317 |
318 | # Metrics
319 | self.add_batch_timer = TimerStat()
320 | self.replay_timer = TimerStat()
321 | self.update_priorities_timer = TimerStat()
322 | self.num_added = 0
323 |
324 |
325 | # added for dynamic experience replay
326 | self.prioritized_replay_alpha = prioritized_replay_alpha
327 | self.human_demonstration = human_demonstration
328 | self.multiple_human_data = multiple_human_data
329 | self.human_data_dir = human_data_dir
330 | self.dynamic_experience_replay = dynamic_experience_replay
331 | self.demonstration_zone_percentage = demonstration_zone_percentage
332 | self.robot_demo_path = robot_demo_path
333 | self.load_human_demo()
334 |
335 |
336 | # added for dynamic experience replay
337 | def load_human_demo(self):
338 | if self.human_demonstration:
339 | assert self.human_data_dir is not None, "human demonstration path is None"
340 | assert (self.multiple_human_data == -1 or
341 | self.multiple_human_data == 0 or
342 | self.multiple_human_data == 1), "multiple_human_data has to be either -1, 0, or 1"
343 |
344 | # one shot in all buffers
345 | if self.multiple_human_data == -1:
346 | in_file = open(self.human_data_dir, 'rb')
347 | human_data = pickle.load(in_file)
348 | in_file.close()
349 | prGreen("Human demo data loaded from: {}".format(self.human_data_dir))
350 |
351 | # all the shots in one buffer
352 | elif self.multiple_human_data == 0:
353 | names = os.listdir(self.human_data_dir)
354 | filtered_files = file_filter(names)
355 | prGreen("Insert {} demonstrations to the buffer".format(len(filtered_files)))
356 |
357 | human_data = deque()
358 | human_demo_dir = None
359 | for i in range(len(filtered_files)):
360 | human_demo_dir = self.human_data_dir + str(i)
361 | in_file = open(human_demo_dir, 'rb')
362 | human_data.extend(pickle.load(in_file))
363 | in_file.close()
364 |
365 | # each shot occupies one buffer
366 | elif self.multiple_human_data == 1:
367 | names = os.listdir(self.human_data_dir)
368 | human_data_dir = self.human_data_dir
369 | filtered_files = file_filter(names)
370 | prGreen("Sampling from {} demonstrations".format(len(filtered_files)))
371 | selected_transition = random.randint(0,len(filtered_files)-1)
372 | if selected_transition is not None:
373 | human_data_dir += str(selected_transition)
374 | in_file = open(human_data_dir, 'rb')
375 | human_data = pickle.load(in_file)
376 | in_file.close()
377 | prGreen("Human demo data loaded from: {}".format(human_data_dir))
378 |
379 | def new_buffer():
380 | return PrioritizedReplayBuffer(
381 | self.buffer_size, alpha=self.prioritized_replay_alpha,
382 | permanent_data_length=len(human_data), human_data=human_data,
383 | dynamic_experience_replay=self.dynamic_experience_replay,
384 | demonstration_zone_percentage=self.demonstration_zone_percentage)
385 | self.replay_buffers = collections.defaultdict(new_buffer)
386 | prGreen("{} human transitions are loaded to the replay buffer.".format(len(human_data)))
387 |
388 | else:
389 | def new_buffer():
390 | return PrioritizedReplayBuffer(
391 | self.buffer_size, alpha=self.prioritized_replay_alpha,
392 | dynamic_experience_replay=self.dynamic_experience_replay,
393 | demonstration_zone_percentage=self.demonstration_zone_percentage)
394 | self.replay_buffers = collections.defaultdict(new_buffer)
395 |
396 |
397 | # added for dynamic experience replay
398 | def update_demos(self):
399 | # randomly selecting a demo
400 | robot_data_dir = self.robot_demo_path
401 | names = os.listdir(robot_data_dir)
402 | filtered_files = file_filter(names)
403 |
404 | if len(filtered_files) > 0:
405 | selected_transition = filtered_files[random.randint(0,len(filtered_files)-1)]
406 | robot_data_dir += str(selected_transition)
407 | in_file = open(robot_data_dir, 'rb')
408 | demo_data = pickle.load(in_file)
409 | in_file.close()
410 |
411 | for policy_id, replay_buffer in self.replay_buffers.items():
412 | replay_buffer.update_demos(demo_data)
413 | prGreen("Newly sampled demo length: {}".format(len(demo_data)))
414 | prGreen("The current demonstration size: {}".format(replay_buffer._permanent_data_length))
415 |
416 |
417 | def get_host(self):
418 | return os.uname()[1]
419 |
420 | def add_batch(self, batch):
421 | # Handle everything as if multiagent
422 | if isinstance(batch, SampleBatch):
423 | batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch}, batch.count)
424 | with self.add_batch_timer:
425 | for policy_id, s in batch.policy_batches.items():
426 | for row in s.rows():
427 | self.replay_buffers[policy_id].add(
428 | row["obs"], row["actions"], row["rewards"],
429 | row["new_obs"], row["dones"], row["weights"])
430 | self.num_added += batch.count
431 |
432 | def replay(self):
433 | if self.num_added < self.replay_starts:
434 | return None
435 |
436 | with self.replay_timer:
437 | samples = {}
438 | for policy_id, replay_buffer in self.replay_buffers.items():
439 | (obses_t, actions, rewards, obses_tp1, dones, weights,
440 | batch_indexes) = replay_buffer.sample(
441 | self.train_batch_size, beta=self.prioritized_replay_beta)
442 | samples[policy_id] = SampleBatch({
443 | "obs": obses_t,
444 | "actions": actions,
445 | "rewards": rewards,
446 | "new_obs": obses_tp1,
447 | "dones": dones,
448 | "weights": weights,
449 | "batch_indexes": batch_indexes
450 | })
451 | return MultiAgentBatch(samples, self.train_batch_size)
452 |
453 | def update_priorities(self, prio_dict):
454 | with self.update_priorities_timer:
455 | for policy_id, (batch_indexes, td_errors) in prio_dict.items():
456 | new_priorities = (
457 | np.abs(td_errors) + self.prioritized_replay_eps)
458 | self.replay_buffers[policy_id].update_priorities(
459 | batch_indexes, new_priorities)
460 |
461 | def stats(self, debug=False):
462 | stat = {
463 | "add_batch_time_ms": round(1000 * self.add_batch_timer.mean, 3),
464 | "replay_time_ms": round(1000 * self.replay_timer.mean, 3),
465 | "update_priorities_time_ms": round(
466 | 1000 * self.update_priorities_timer.mean, 3),
467 | }
468 | for policy_id, replay_buffer in self.replay_buffers.items():
469 | stat.update({
470 | "policy_{}".format(policy_id): replay_buffer.stats(debug=debug)
471 | })
472 | return stat
473 |
474 |
475 | # note: we set num_cpus=0 to avoid failing to create replay actors when
476 | # resources are fragmented. This isn't ideal.
477 | @ray.remote(num_cpus=0)
478 | class BatchReplayActor(object):
479 | """The batch replay version of the replay actor.
480 |
481 | This allows for RNN models, but ignores prioritization params.
482 | """
483 |
484 | def __init__(self, num_shards, learning_starts, buffer_size,
485 | train_batch_size, prioritized_replay_alpha,
486 | prioritized_replay_beta, prioritized_replay_eps):
487 | self.replay_starts = learning_starts // num_shards
488 | self.buffer_size = buffer_size // num_shards
489 | self.train_batch_size = train_batch_size
490 | self.buffer = []
491 |
492 | # Metrics
493 | self.num_added = 0
494 | self.cur_size = 0
495 |
496 | def get_host(self):
497 | return os.uname()[1]
498 |
499 | def add_batch(self, batch):
500 | # Handle everything as if multiagent
501 | if isinstance(batch, SampleBatch):
502 | batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch}, batch.count)
503 | self.buffer.append(batch)
504 | self.cur_size += batch.count
505 | self.num_added += batch.count
506 | while self.cur_size > self.buffer_size:
507 | self.cur_size -= self.buffer.pop(0).count
508 |
509 | def replay(self):
510 | if self.num_added < self.replay_starts:
511 | return None
512 | return random.choice(self.buffer)
513 |
514 | def update_priorities(self, prio_dict):
515 | pass
516 |
517 | def stats(self, debug=False):
518 | stat = {
519 | "cur_size": self.cur_size,
520 | "num_added": self.num_added,
521 | }
522 | return stat
523 |
524 |
525 | class LearnerThread(threading.Thread):
526 | """Background thread that updates the local model from replay data.
527 |
528 | The learner thread communicates with the main thread through Queues. This
529 | is needed since Ray operations can only be run on the main thread. In
530 | addition, moving heavyweight gradient ops session runs off the main thread
531 | improves overall throughput.
532 | """
533 |
534 | def __init__(self, local_worker):
535 | threading.Thread.__init__(self)
536 | self.learner_queue_size = WindowStat("size", 50)
537 | self.local_worker = local_worker
538 | self.inqueue = queue.Queue(maxsize=LEARNER_QUEUE_MAX_SIZE)
539 | self.outqueue = queue.Queue()
540 | self.queue_timer = TimerStat()
541 | self.grad_timer = TimerStat()
542 | self.daemon = True
543 | self.weights_updated = False
544 | self.stopped = False
545 | self.stats = {}
546 |
547 | def run(self):
548 | while not self.stopped:
549 | self.step()
550 |
551 | def step(self):
552 | with self.queue_timer:
553 | ra, replay = self.inqueue.get()
554 | if replay is not None:
555 | prio_dict = {}
556 | with self.grad_timer:
557 | grad_out = self.local_worker.learn_on_batch(replay)
558 | for pid, info in grad_out.items():
559 | prio_dict[pid] = (
560 | replay.policy_batches[pid].data.get("batch_indexes"),
561 | info.get("td_error"))
562 | self.stats[pid] = get_learner_stats(info)
563 | self.outqueue.put((ra, prio_dict, replay.count))
564 | self.learner_queue_size.push(self.inqueue.qsize())
565 | self.weights_updated = True
566 |
--------------------------------------------------------------------------------
/copy_to_rllib/optimizers/replay_buffer.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | import random
7 | import sys
8 |
9 | from ray.rllib.optimizers.segment_tree import SumSegmentTree, MinSegmentTree
10 | from ray.rllib.utils.annotations import DeveloperAPI
11 | from ray.rllib.utils.compression import unpack_if_needed
12 | from ray.rllib.utils.window_stat import WindowStat
13 |
14 | # added for dynamic experience replay
15 | import math
16 | # print texts in green in terminal
17 | def prGreen(skk): print("\033[92m {}\033[00m" .format(skk))
18 |
19 | @DeveloperAPI
20 | class ReplayBuffer(object):
21 | @DeveloperAPI
22 | def __init__(self, size, permanent_data_length=0):
23 | """Create Prioritized Replay buffer.
24 |
25 | Parameters
26 | ----------
27 | size: int
28 | Max number of transitions to store in the buffer. When the buffer
29 | overflows the old memories are dropped.
30 | """
31 | self._storage = []
32 | self._maxsize = size
33 | self._next_idx = 0
34 | self._hit_count = np.zeros(size)
35 | self._eviction_started = False
36 | self._num_added = 0
37 | self._num_sampled = 0
38 | self._evicted_hit_stats = WindowStat("evicted_hit", 1000)
39 | self._est_size_bytes = 0
40 | self._permanent_data_length = permanent_data_length
41 |
42 | def __len__(self):
43 | return len(self._storage)
44 |
45 | @DeveloperAPI
46 | def add(self, obs_t, action, reward, obs_tp1, done, weight):
47 | data = (obs_t, action, reward, obs_tp1, done)
48 | self._num_added += 1
49 |
50 | if self._next_idx >= len(self._storage):
51 | self._storage.append(data)
52 | self._est_size_bytes += sum(sys.getsizeof(d) for d in data)
53 | else:
54 | self._storage[self._next_idx] = data
55 | if self._next_idx + 1 >= self._maxsize:
56 | self._eviction_started = True
57 | # added for dynamic experience replay
58 | self._next_idx = self._permanent_data_length
59 | else:
60 | self._next_idx = self._next_idx + 1
61 | if self._eviction_started:
62 | self._evicted_hit_stats.push(self._hit_count[self._next_idx])
63 | self._hit_count[self._next_idx] = 0
64 |
65 | def _encode_sample(self, idxes):
66 | obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
67 | for i in idxes:
68 | data = self._storage[i]
69 | obs_t, action, reward, obs_tp1, done = data
70 | obses_t.append(np.array(unpack_if_needed(obs_t), copy=False))
71 | actions.append(np.array(action, copy=False))
72 | rewards.append(reward)
73 | obses_tp1.append(np.array(unpack_if_needed(obs_tp1), copy=False))
74 | dones.append(done)
75 | self._hit_count[i] += 1
76 | return (np.array(obses_t), np.array(actions), np.array(rewards),
77 | np.array(obses_tp1), np.array(dones))
78 |
79 | @DeveloperAPI
80 | def sample_idxes(self, batch_size):
81 | return [
82 | random.randint(0,
83 | len(self._storage) - 1) for _ in range(batch_size)
84 | ]
85 |
86 | @DeveloperAPI
87 | def sample_with_idxes(self, idxes):
88 | self._num_sampled += len(idxes)
89 | return self._encode_sample(idxes)
90 |
91 | @DeveloperAPI
92 | def sample(self, batch_size):
93 | """Sample a batch of experiences.
94 |
95 | Parameters
96 | ----------
97 | batch_size: int
98 | How many transitions to sample.
99 |
100 | Returns
101 | -------
102 | obs_batch: np.array
103 | batch of observations
104 | act_batch: np.array
105 | batch of actions executed given obs_batch
106 | rew_batch: np.array
107 | rewards received as results of executing act_batch
108 | next_obs_batch: np.array
109 | next set of observations seen after executing act_batch
110 | done_mask: np.array
111 | done_mask[i] = 1 if executing act_batch[i] resulted in
112 | the end of an episode and 0 otherwise.
113 | """
114 | idxes = [
115 | random.randint(0,
116 | len(self._storage) - 1) for _ in range(batch_size)
117 | ]
118 | self._num_sampled += batch_size
119 | return self._encode_sample(idxes)
120 |
121 | @DeveloperAPI
122 | def stats(self, debug=False):
123 | data = {
124 | "added_count": self._num_added,
125 | "sampled_count": self._num_sampled,
126 | "est_size_bytes": self._est_size_bytes,
127 | "num_entries": len(self._storage),
128 | }
129 | if debug:
130 | data.update(self._evicted_hit_stats.stats())
131 | return data
132 |
133 |
134 | @DeveloperAPI
135 | class PrioritizedReplayBuffer(ReplayBuffer):
136 | @DeveloperAPI
137 | def __init__(self, size, alpha,
138 | # added for dynamic experience replay
139 | permanent_data_length=0, human_data=None,
140 | dynamic_experience_replay=False,
141 | demonstration_zone_percentage=0):
142 |
143 | """Create Prioritized Replay buffer.
144 |
145 | Parameters
146 | ----------
147 | size: int
148 | Max number of transitions to store in the buffer. When the buffer
149 | overflows the old memories are dropped.
150 | alpha: float
151 | how much prioritization is used
152 | (0 - no prioritization, 1 - full prioritization)
153 |
154 | See Also
155 | --------
156 | ReplayBuffer.__init__
157 | """
158 |
159 | # added for dynamic experience replay
160 | super(PrioritizedReplayBuffer, self).__init__(size, permanent_data_length)
161 |
162 | assert alpha > 0
163 | self._alpha = alpha
164 |
165 | it_capacity = 1
166 | while it_capacity < size:
167 | it_capacity *= 2
168 |
169 | self._it_sum = SumSegmentTree(it_capacity)
170 | self._it_min = MinSegmentTree(it_capacity)
171 | self._max_priority = 1.0
172 | self._prio_change_stats = WindowStat("reprio", 1000)
173 |
174 | # added for dynamic experience replay
175 | self._permanent_data_length = permanent_data_length
176 | self._demo_index = permanent_data_length
177 | if dynamic_experience_replay and demonstration_zone_percentage > 0:
178 | self._max_demo_size = math.ceil(size * demonstration_zone_percentage) # assign the size for demonstration zone
179 | assert self._max_demo_size > 10000, "demonstration zone is too small, please increase buffer size"
180 | prGreen("the maximum demonstration zone size: {}".format(self._max_demo_size))
181 | if self._permanent_data_length > 0:
182 | # insert human demos to the buffer with full prioritization
183 | for transition in human_data:
184 | self.add(transition[0], transition[1], transition[2], transition[3], transition[4], 1)
185 |
186 | @DeveloperAPI
187 | def add(self, obs_t, action, reward, obs_tp1, done, weight):
188 | """See ReplayBuffer.store_effect"""
189 |
190 | idx = self._next_idx
191 | super(PrioritizedReplayBuffer, self).add(obs_t, action, reward,
192 | obs_tp1, done, weight)
193 | if weight is None:
194 | weight = self._max_priority
195 | self._it_sum[idx] = weight**self._alpha
196 | self._it_min[idx] = weight**self._alpha
197 |
198 | # added for dynamic experience replay
199 | def update_demos(self, demo_data):
200 |
201 | updated_demo_size = self._permanent_data_length + len(demo_data)
202 | if updated_demo_size < self._max_demo_size:
203 | self._permanent_data_length = updated_demo_size
204 | else:
205 | self._permanent_data_length = self._max_demo_size
206 |
207 | for transition in demo_data:
208 | self._storage[self._demo_index] = transition
209 | self._demo_index = (self._demo_index + 1) % self._max_demo_size
210 |
211 | def _sample_proportional(self, batch_size):
212 | res = []
213 | for _ in range(batch_size):
214 | # TODO(szymon): should we ensure no repeats?
215 | mass = random.random() * self._it_sum.sum(0, len(self._storage))
216 | idx = self._it_sum.find_prefixsum_idx(mass)
217 | res.append(idx)
218 | return res
219 |
220 | @DeveloperAPI
221 | def sample_idxes(self, batch_size):
222 | return self._sample_proportional(batch_size)
223 |
224 | @DeveloperAPI
225 | def sample_with_idxes(self, idxes, beta):
226 | assert beta > 0
227 | self._num_sampled += len(idxes)
228 |
229 | weights = []
230 | p_min = self._it_min.min() / self._it_sum.sum()
231 | max_weight = (p_min * len(self._storage))**(-beta)
232 |
233 | for idx in idxes:
234 | p_sample = self._it_sum[idx] / self._it_sum.sum()
235 | weight = (p_sample * len(self._storage))**(-beta)
236 | weights.append(weight / max_weight)
237 | weights = np.array(weights)
238 | encoded_sample = self._encode_sample(idxes)
239 | return tuple(list(encoded_sample) + [weights, idxes])
240 |
241 | @DeveloperAPI
242 | def sample(self, batch_size, beta):
243 | """Sample a batch of experiences.
244 |
245 | compared to ReplayBuffer.sample
246 | it also returns importance weights and idxes
247 | of sampled experiences.
248 |
249 |
250 | Parameters
251 | ----------
252 | batch_size: int
253 | How many transitions to sample.
254 | beta: float
255 | To what degree to use importance weights
256 | (0 - no corrections, 1 - full correction)
257 |
258 | Returns
259 | -------
260 | obs_batch: np.array
261 | batch of observations
262 | act_batch: np.array
263 | batch of actions executed given obs_batch
264 | rew_batch: np.array
265 | rewards received as results of executing act_batch
266 | next_obs_batch: np.array
267 | next set of observations seen after executing act_batch
268 | done_mask: np.array
269 | done_mask[i] = 1 if executing act_batch[i] resulted in
270 | the end of an episode and 0 otherwise.
271 | weights: np.array
272 | Array of shape (batch_size,) and dtype np.float32
273 | denoting importance weight of each sampled transition
274 | idxes: np.array
275 | Array of shape (batch_size,) and dtype np.int32
276 | idexes in buffer of sampled experiences
277 | """
278 | assert beta > 0
279 | self._num_sampled += batch_size
280 |
281 | idxes = self._sample_proportional(batch_size)
282 |
283 | weights = []
284 | p_min = self._it_min.min() / self._it_sum.sum()
285 | max_weight = (p_min * len(self._storage))**(-beta)
286 |
287 | for idx in idxes:
288 | p_sample = self._it_sum[idx] / self._it_sum.sum()
289 | weight = (p_sample * len(self._storage))**(-beta)
290 | weights.append(weight / max_weight)
291 | weights = np.array(weights)
292 | encoded_sample = self._encode_sample(idxes)
293 | return tuple(list(encoded_sample) + [weights, idxes])
294 |
295 | @DeveloperAPI
296 | def update_priorities(self, idxes, priorities):
297 | """Update priorities of sampled transitions.
298 |
299 | sets priority of transition at index idxes[i] in buffer
300 | to priorities[i].
301 |
302 | Parameters
303 | ----------
304 | idxes: [int]
305 | List of idxes of sampled transitions
306 | priorities: [float]
307 | List of updated priorities corresponding to
308 | transitions at the sampled idxes denoted by
309 | variable `idxes`.
310 | """
311 | assert len(idxes) == len(priorities)
312 | for idx, priority in zip(idxes, priorities):
313 | assert priority > 0
314 | assert 0 <= idx < len(self._storage)
315 | delta = priority**self._alpha - self._it_sum[idx]
316 | self._prio_change_stats.push(delta)
317 | self._it_sum[idx] = priority**self._alpha
318 | self._it_min[idx] = priority**self._alpha
319 |
320 | self._max_priority = max(self._max_priority, priority)
321 |
322 | @DeveloperAPI
323 | def stats(self, debug=False):
324 | parent = ReplayBuffer.stats(self, debug)
325 | if debug:
326 | parent.update(self._prio_change_stats.stats())
327 | return parent
328 |
--------------------------------------------------------------------------------
/devices/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | """
4 | import warnings
5 |
6 | from devices.devices.nothing import Nothing
7 | from devices.devices.pybulletgui import PyBulletGUI
8 | from devices.devices.spacemouse import SpaceMouse
9 | from devices.devices.xboxcontroller import XBoxController
10 |
11 | # include classes in registry
12 | REGISTRY = {}
13 | for cls in [Nothing, PyBulletGUI, SpaceMouse, XBoxController]:
14 | # noinspection PyBroadException
15 | try:
16 | d = cls()
17 | REGISTRY[d.codename] = cls
18 | REGISTRY[d.clsname] = cls
19 | except Exception as e:
20 | warnings.warn('{} could not be registered due to '
21 | 'Exception: {}'.format(cls.__name__, e))
22 |
--------------------------------------------------------------------------------
/devices/action.py:
--------------------------------------------------------------------------------
1 | #! usr/bin/env python
2 | """
3 |
4 | """
5 |
6 | from threading import Event
7 |
8 | X = 'x'
9 | Y = 'y'
10 | Z = 'z'
11 | RX = 'rx'
12 | RY = 'ry'
13 | RZ = 'rz'
14 |
15 |
16 | class Action:
17 | def __init__(self):
18 | """
19 | Base action class.
20 | """
21 | self._action = {}
22 | for name in [X, Y, Z, RX, RY, RZ]:
23 | self._action[name] = 0.0
24 | self._event = Event()
25 |
26 | def await_new(self, timeout=None):
27 | """
28 | Wait for this device to have new data.
29 | :param timeout: float or None, timeout
30 | :return: bool, True if data acquired before timeout else False
31 | """
32 | return self._event.wait(timeout)
33 |
34 | @property
35 | def has_new(self):
36 | """
37 | Get whether this device has new data.
38 | :return: bool, True if new data available else False
39 | """
40 | return self._event.isSet()
41 |
42 | @property
43 | def x(self):
44 | """
45 | Action attribute getter.
46 | :return: float
47 | """
48 | self._event.clear()
49 | return self._action[X]
50 |
51 | @x.setter
52 | def x(self, value):
53 | """
54 | Action attribute setter.
55 | :param value: float
56 | """
57 | self._event.set()
58 | self._action[X] = float(value)
59 |
60 | @property
61 | def y(self):
62 | """
63 | Action attribute getter.
64 | :return: float
65 | """
66 | self._event.clear()
67 | return self._action[Y]
68 |
69 | @y.setter
70 | def y(self, value):
71 | """
72 | Action attribute setter.
73 | :param value: float
74 | """
75 | self._event.set()
76 | self._action[Y] = float(value)
77 |
78 | @property
79 | def z(self):
80 | """
81 | Action attribute getter.
82 | :return: float
83 | """
84 | self._event.clear()
85 | return self._action[Z]
86 |
87 | @z.setter
88 | def z(self, value):
89 | """
90 | Action attribute setter.
91 | :param value: float
92 | """
93 | self._event.set()
94 | self._action[Z] = float(value)
95 |
96 | @property
97 | def rx(self):
98 | """
99 | Action attribute getter.
100 | :return: float
101 | """
102 | self._event.clear()
103 | return self._action[RX]
104 |
105 | @rx.setter
106 | def rx(self, value):
107 | """
108 | Action attribute setter.
109 | :param value: float
110 | """
111 | self._event.set()
112 | self._action[RX] = float(value)
113 |
114 | @property
115 | def ry(self):
116 | """
117 | Action attribute getter.
118 | :return: float
119 | """
120 | self._event.clear()
121 | return self._action[RY]
122 |
123 | @ry.setter
124 | def ry(self, value):
125 | """
126 | Action attribute setter.
127 | :param value: float
128 | """
129 | self._event.set()
130 | self._action[RY] = float(value)
131 |
132 | @property
133 | def rz(self):
134 | """
135 | Action attribute getter.
136 | :return: float
137 | """
138 | self._event.clear()
139 | return self._action[RZ]
140 |
141 | @rz.setter
142 | def rz(self, value):
143 | """
144 | Action attribute setter.
145 | :param value: float
146 | """
147 | self._event.set()
148 | self._action[RZ] = float(value)
149 |
150 | @property
151 | def pose(self):
152 | """
153 | Get pose.
154 | :return: tuple
155 | """
156 | self._event.clear()
157 | return [self._action[name] for name in [X, Y, Z, RX, RY, RZ]]
158 |
159 | @pose.setter
160 | def pose(self, tup):
161 | """
162 | Set pose.
163 | :param tup: tuple
164 | """
165 | for i, name in enumerate([X, Y, Z, RX, RY, RZ]):
166 | self._action[name] = float(tup[i])
167 | self._event.set()
168 |
169 | @property
170 | def pos(self):
171 | """
172 | Get position.
173 | :return: tuple
174 | """
175 | self._event.clear()
176 | return [self._action[name] for name in [X, Y, Z]]
177 |
178 | @pos.setter
179 | def pos(self, tup):
180 | """
181 | Set position.
182 | :param tup: tuple
183 | """
184 | for i, name in enumerate([X, Y, Z]):
185 | self._action[name] = float(tup[i])
186 | self._event.set()
187 |
188 | @property
189 | def orn(self):
190 | """
191 | Get orientation.
192 | :return: tuple
193 | """
194 | self._event.clear()
195 | return [self._action[name] for name in [RX, RY, RZ]]
196 |
197 | @orn.setter
198 | def orn(self, tup):
199 | """
200 | Set orientation.
201 | :param tup: tuple
202 | """
203 | for i, name in enumerate([RX, RY, RZ]):
204 | self._action[name] = float(tup[i])
205 | self._event.set()
206 |
--------------------------------------------------------------------------------
/devices/callbacks.py:
--------------------------------------------------------------------------------
1 | #! usr/bin/env python
2 | """
3 |
4 | """
5 | import inspect
6 |
7 |
8 | def handle(func, *args, **kwargs):
9 | """
10 | Handles function callbacks, typically on execution of a class method.
11 | :param func: func, callback function
12 | :param args: arguments, optional
13 | :param kwargs: keyword arguments, optional
14 | """
15 | if func is not None:
16 | try:
17 | return func(*args, **kwargs)
18 | except TypeError:
19 | return func()
20 | return
21 |
22 |
23 | def validate(func, allow_args=False, allow_return=False, require_self=False):
24 | """
25 | Checks whether a function parameter is valid. The selected rules must be
26 | observed when implementing a function into this framework, however, this
27 | is implementation dependent.
28 | :param func: func, function must be callable or None.
29 | :param allow_args: bool, if False, func may not have input arguments.
30 | :param allow_return: bool, if False, func may not contain a return statement.
31 | :param require_self: bool, if False, func requires the first argument `self`.
32 | """
33 | assert callable(func) or func is None, \
34 | 'function must be callable or None'
35 | if callable(func):
36 | if not allow_args and not require_self:
37 | signature = inspect.signature(func)
38 | assert len(signature.parameters) == 0, \
39 | 'func may not have inputs arguments'
40 | if not allow_return:
41 | lines, _ = inspect.getsourcelines(func)
42 | assert not lines[-1].lstrip().startswith('return'), \
43 | 'func may not contain return statement'
44 | if require_self:
45 | signature = inspect.signature(func)
46 | assert next(iter(signature.parameters)) == 'self', \
47 | 'func requires the first argument `self`'
48 |
49 |
50 |
--------------------------------------------------------------------------------
/devices/device.py:
--------------------------------------------------------------------------------
1 | #! usr/bin/env python
2 | """
3 |
4 | """
5 |
6 | from abc import ABC
7 |
8 | from devices.action import Action
9 | from devices.pconnect import ConnectPattern
10 | from devices.pdisconnect import DisconnectPattern
11 | from devices.pupdate import UpdatePattern
12 |
13 |
14 | class InputDevice(ConnectPattern, DisconnectPattern, UpdatePattern, Action, ABC):
15 | def __init__(self, pos_scaling=1.0, orn_scaling=1.0):
16 | """
17 | Initialize input device. The class methods Connect, Disconnect, and
18 | Update are abstract and must be implemented in all device subclasses.
19 | :param pos_scaling: float, scaling applied to input positions
20 | :param orn_scaling: float, scaling applied to input orientations
21 | """
22 | ConnectPattern.__init__(self)
23 | DisconnectPattern.__init__(self)
24 | UpdatePattern.__init__(self)
25 | Action.__init__(self)
26 | # Initialize properties
27 | self.action = Action()
28 | self.pos_scaling = pos_scaling
29 | self.orn_scaling = orn_scaling
30 | self._device = None
31 |
32 | @property
33 | def clsname(self):
34 | """
35 | Get the name of this device class.
36 | :return: str
37 | """
38 | return self.__class__.__name__.lower()
39 |
40 | @property
41 | def codename(self):
42 | """
43 | Get the codename of this device class.
44 | :return:
45 | """
46 | raise NotImplementedError
47 |
48 | def start(self):
49 | """
50 | Start this device.
51 | """
52 | if not self._is_connected:
53 | self.connect()
54 |
--------------------------------------------------------------------------------
/devices/devices/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/devices/devices/__init__.py
--------------------------------------------------------------------------------
/devices/devices/nothing.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | """
4 |
5 | import random
6 |
7 | from devices.device import InputDevice
8 |
9 |
10 | class Nothing(InputDevice):
11 | def __init__(self, *args, **kwargs):
12 | InputDevice.__init__(self, *args, **kwargs)
13 | self.is_random = True
14 |
15 | @property
16 | def codename(self):
17 | return 'nth'
18 |
19 | def _connect(self):
20 | self._is_connected = True
21 |
22 | def _disconnect(self):
23 | self._is_connected = False
24 |
25 | def _update(self):
26 | action = []
27 | for i in range(6):
28 | a = random.uniform(-1.0, 1.0) if self.is_random else 0.0
29 | a *= self.pos_scaling if i < 3 else self.orn_scaling
30 | action.append(a)
31 | self.pose = action
32 |
--------------------------------------------------------------------------------
/devices/devices/pybulletgui.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | """
4 |
5 | import pybullet as p
6 |
7 | from devices.device import InputDevice
8 |
9 |
10 | class PyBulletGUI(InputDevice):
11 | def __init__(self, *args, **kwargs):
12 | InputDevice.__init__(self, *args, **kwargs)
13 |
14 | @property
15 | def codename(self):
16 | return 'pbg'
17 |
18 | def _connect(self):
19 | assert p.isConnected(), 'PyBullet not connected.'
20 | self._axis_ids = []
21 | keys = list(self._action.keys())
22 | for i in range(6):
23 | self._axis_ids.append(
24 | p.addUserDebugParameter(
25 | paramName=keys[i],
26 | rangeMin=-1,
27 | rangeMax=1,
28 | startValue=0))
29 | self._is_connected = True
30 |
31 | def _disconnect(self):
32 | pass
33 |
34 | def _update(self):
35 | action = []
36 | for i in range(6):
37 | value = p.readUserDebugParameter(self._axis_ids[i])
38 | value *= self.pos_scaling if i < 3 else self.orn_scaling
39 | action.append(value)
40 | self.pose = action
41 |
42 |
43 | if __name__ == '__main__':
44 |
45 | def printout(s):
46 | print(s.pose)
47 |
48 |
49 | s = PyBulletGUI()
50 | s.on_update = printout
51 | import time
52 |
53 | while True:
54 | time.sleep(1)
55 |
--------------------------------------------------------------------------------
/devices/devices/spacemouse.py:
--------------------------------------------------------------------------------
1 | #! usr/bin/env python
2 | """
3 |
4 | """
5 |
6 | import pygame
7 | from pygame.constants import JOYAXISMOTION
8 |
9 | from devices.device import InputDevice
10 |
11 |
12 | class SpaceMouse(InputDevice):
13 | def __init__(self, *args, **kwargs):
14 | InputDevice.__init__(self, *args, **kwargs)
15 |
16 | @property
17 | def codename(self):
18 | return 'spm'
19 |
20 | def _connect(self):
21 | pygame.init()
22 | pygame.joystick.init()
23 | assert pygame.joystick.get_count() > 0, \
24 | 'No joystick found!'
25 | self._device = pygame.joystick.Joystick(0)
26 | self._device.init()
27 | self._is_connected = True
28 |
29 | def _disconnect(self, *args, **kwargs):
30 | pass
31 |
32 | def _update(self):
33 | action = []
34 | for event in pygame.event.get():
35 | if event.type == JOYAXISMOTION:
36 | action = [self._device.get_axis(i) for i in range(6)]
37 | if len(action) == 0:
38 | return
39 | for i in range(6):
40 | action[i] *= self.pos_scaling if i < 3 else self.orn_scaling
41 | action[0] = -action[0] # x direction is positive
42 | self.pose = tuple(action)
43 |
44 |
45 | if __name__ == '__main__':
46 |
47 | def printout(s):
48 | print(s.pose)
49 |
50 |
51 | s = SpaceMouse()
52 | s.start()
53 | s.on_update = printout
54 | import time
55 |
56 | while True:
57 | time.sleep(1)
58 |
--------------------------------------------------------------------------------
/devices/devices/xboxcontroller.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | """
4 |
5 | import pygame
6 | from pygame.locals import *
7 |
8 | from devices.device import InputDevice
9 |
10 |
11 | class XBoxController(InputDevice):
12 | def __init__(self, *args, **kwargs):
13 | InputDevice.__init__(self, *args, **kwargs)
14 |
15 | @property
16 | def codename(self):
17 | return 'xbc'
18 |
19 | def _connect(self):
20 | pygame.init()
21 | pygame.joystick.init()
22 | assert pygame.joystick.get_count() > 0, \
23 | 'No joystick found!'
24 | self._device = pygame.joystick.Joystick(0)
25 | self._device.init()
26 | self._z_directions = [1, 1]
27 | self._is_connected = True
28 |
29 | def _disconnect(self):
30 | pass
31 |
32 | def _update(self):
33 | action = []
34 | for event in pygame.event.get():
35 | if event.type == JOYBUTTONUP or event.type == JOYBUTTONDOWN:
36 | button_state = [self._device.get_button(i) for i in range(15)]
37 | self._z_directions[0] *= -1 if button_state[8] else 1
38 | self._z_directions[1] *= -1 if button_state[9] else 1
39 | if event.type == JOYAXISMOTION:
40 | action = [self._device.get_axis(i) for i in range(6)]
41 | x = action[0]
42 | y = action[1] * -1
43 | z = (action[4] + 1) / 2 * self._z_directions[0]
44 | rx = action[2]
45 | ry = action[3] * -1
46 | rz = (action[5] + 1) / 2 * self._z_directions[1]
47 | action = [x, y, z, rx, ry, rz]
48 | if len(action) == 0:
49 | return
50 | for i in range(6):
51 | action[i] = 0.0 if abs(action[i]) < 0.001 else action[i] # hpf
52 | action[i] *= self.pos_scaling if i < 3 else self.orn_scaling
53 | action[i] = round(action[i], 5)
54 | self.pose = action
55 |
56 |
57 | if __name__ == '__main__':
58 |
59 | def printout(s):
60 | print(s.pose)
61 |
62 |
63 | s = XBoxController()
64 | s.start()
65 | s.on_update = printout
66 | import time
67 |
68 | while True:
69 | time.sleep(1)
70 |
--------------------------------------------------------------------------------
/devices/pconnect.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | from devices.callbacks import handle, validate
4 |
5 |
6 | class ConnectPattern(ABC):
7 | def __init__(self):
8 | self._on_connect = None
9 | if '_is_connected' not in self.__dict__:
10 | self._is_connected = False
11 |
12 | @property
13 | def is_connected(self):
14 | """
15 | Get connection state.
16 | :return: bool, True if connected else False.
17 | """
18 | return self._is_connected
19 |
20 | def connect(self, *args, **kwargs):
21 | """
22 | Connect method.
23 | :param args: arguments
24 | :param kwargs: keyword arguments
25 | :return: bool, result
26 | """
27 | result = self._connect(*args, **kwargs)
28 | self._is_connected = result
29 | handle(self._on_connect, self)
30 | return result
31 |
32 | @abstractmethod
33 | def _connect(self, *args, **kwargs):
34 | """
35 | Called by public method.
36 | :param args: arguments
37 | :param kwargs: keyword arguments
38 | :return: bool, result
39 | """
40 | pass
41 |
42 | @property
43 | def on_connect(self):
44 | """
45 | Get callback.
46 | :return: func, callback
47 | """
48 | return self._on_connect
49 |
50 | @on_connect.setter
51 | def on_connect(self, func):
52 | """
53 | Set callback.
54 | :param func: func, callback
55 | """
56 | validate(func, allow_args=True, allow_return=True)
57 | self._on_connect = func
58 |
--------------------------------------------------------------------------------
/devices/pdisconnect.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | from devices.callbacks import handle, validate
4 |
5 |
6 | class DisconnectPattern(ABC):
7 | def __init__(self):
8 | self._on_disconnect = None
9 | if '_is_connected' not in self.__dict__:
10 | self._is_connected = False
11 |
12 | def disconnect(self, *args, **kwargs):
13 | """
14 | Disconnect method.
15 | :param args: arguments
16 | :param kwargs: keyword arguments
17 | :return: bool, result
18 | """
19 | result = self._disconnect(*args, **kwargs)
20 | self._is_connected = result
21 | handle(self._on_disconnect, self)
22 | return result
23 |
24 | @abstractmethod
25 | def _disconnect(self, *args, **kwargs):
26 | """
27 | Called by public method.
28 | :param args: arguments
29 | :param kwargs: keyword arguments
30 | :return: bool, result
31 | """
32 | pass
33 |
34 | @property
35 | def on_disconnect(self):
36 | """
37 | Get callback.
38 | :return: func, callback
39 | """
40 | return self._on_disconnect
41 |
42 | @on_disconnect.setter
43 | def on_disconnect(self, func):
44 | """
45 | Set callback.
46 | :param func: func, callback
47 | """
48 | validate(func, allow_args=True, allow_return=True)
49 | self._on_disconnect = func
--------------------------------------------------------------------------------
/devices/pupdate.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | from devices.callbacks import handle, validate
4 |
5 |
6 | class UpdatePattern(ABC):
7 | def __init__(self):
8 | self._on_update = None
9 |
10 | def update(self, *args, **kwargs):
11 | """
12 | Update method.
13 | :param args: arguments
14 | :param kwargs: keyword arguments
15 | :return: bool, result
16 | """
17 | result = self._update(*args, **kwargs)
18 | handle(self._on_update, self)
19 | return result
20 |
21 | @abstractmethod
22 | def _update(self, *args, **kwargs):
23 | """
24 | Called by public method.
25 | :param args: arguments
26 | :param kwargs: keyword arguments
27 | :return: bool, result
28 | """
29 | pass
30 |
31 | @property
32 | def on_update(self):
33 | """
34 | Get callback.
35 | :return: func, callback
36 | """
37 | return self._on_update
38 |
39 | @on_update.setter
40 | def on_update(self, func):
41 | """
42 | Set callback.
43 | :param func: func, callback
44 | """
45 | validate(func, allow_args=True, allow_return=True)
46 | self._on_update = func
47 |
--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/__init__.py
--------------------------------------------------------------------------------
/envs/robot_real_example.py:
--------------------------------------------------------------------------------
1 | import utilities as util
2 |
3 | # modules for interfacing with robot control and FT sensor
4 | # from robot_control_interface import ControlInterface
5 | # from ft_sensor_interface import FTInterface
6 |
7 |
8 | class RobotRealExample():
9 |
10 | def __init__(self):
11 |
12 | # Connect the interfaces
13 | self.robot_interface = ControlInterface()
14 | self.ft_interface = FTInterface()
15 |
16 | @staticmethod
17 | def decompose_incoming_pose_data(data):
18 | position = data[:3]
19 | rotation = data[3:7]
20 |
21 | return [position, rotation]
22 |
23 | def get_member_pose(self):
24 | self.robot_interface.receive()
25 | data_in = self.robot_interface.message_in.values
26 | values = self.decompose_incoming_pose_data(data_in)
27 | position_m = values[0]
28 | rotation_quat = values[1]
29 |
30 | return [position_m, rotation_quat]
31 |
32 | @staticmethod
33 | def get_target_pose():
34 | # target at world origin
35 | return [0, 0, 0], util.xyzw_by_euler([0, 0, 0], 'sxyz')
36 |
37 | def get_force_torque(self):
38 | self.ft_interface.receive()
39 | data_in = self.ft_interface.message_in.values
40 | force_torque = data_in
41 |
42 | return force_torque
43 |
44 | def apply_action_pose(self, delta, done):
45 | relative_pos = delta[0:3]
46 | relative_orn = delta[3:6]
47 |
48 | data_out = list(relative_pos) + list(relative_orn) + [done]
49 |
50 | self.robot_interface.send(data_out)
51 |
52 | def apply_action_position(self, delta, done):
53 | data_out = list(delta) + [0, 0, 0] + [done]
54 |
55 | self.robot_interface.send(data_out)
56 |
--------------------------------------------------------------------------------
/envs/robot_sim_panda.py:
--------------------------------------------------------------------------------
1 | import math
2 | import pybullet as p
3 | import numpy as np
4 | import utilities as util
5 |
6 | INITIAL_POS = [0.0, 0.0, 0.0]
7 | INITIAL_ORN = [0, 0, 0, 1]
8 | TARGET_POS = [0.6, 0.044, 0]
9 | TARGET_ORN = [0, 0, 0, 1]
10 | HOLE_OFFSET = [0.048485, -0.04305, -0.03]
11 |
12 | URDF_PATH_TOOL = 'envs/urdf/panda_peg_in_hole/panda_peg'
13 | URDF_PATH_TARGET = 'envs/urdf/panda_peg_in_hole/insertion_box'
14 |
15 | # Official initial positions for Panda [0, -45, 0, -135, 0, 90, 45]
16 | INITIAL_PANDA_JOINTS = [math.radians(0), math.radians(20), math.radians(0), math.radians(-103), math.radians(0),
17 | math.radians(122), math.radians(45)]
18 |
19 |
20 | class RobotSimPanda():
21 |
22 | def __init__(self):
23 |
24 | self.uid = p.loadURDF(util.format_urdf_filepath(URDF_PATH_TOOL),
25 | basePosition=INITIAL_POS,
26 | baseOrientation=INITIAL_ORN,
27 | useFixedBase=1)
28 |
29 | self.target_uid = p.loadURDF(util.format_urdf_filepath(URDF_PATH_TARGET),
30 | basePosition=TARGET_POS,
31 | baseOrientation=TARGET_ORN,
32 | useFixedBase=1)
33 | self.link_target = 0
34 | util.display_frame_axis(self.target_uid, self.link_target)
35 |
36 | # In urdf, the panda robot has 11 joints
37 | # 7 joints belong to the arm, each with a FT sensor
38 | self.num_arm_joints = 7
39 |
40 | # FT sensor at the last arm joint
41 | self.link_sensor = 6
42 | util.display_frame_axis(self.uid, self.link_sensor)
43 |
44 | # The 8th link: a virtual link at the end of the arm, as the end effector
45 | self.link_ee = 7
46 | util.display_frame_axis(self.uid, self.link_ee)
47 |
48 | self.link_member = 8
49 | util.display_frame_axis(self.uid, self.link_member)
50 |
51 | self.max_force = 200
52 | self.max_velocity = 0.35
53 |
54 | self.joint_positions = INITIAL_PANDA_JOINTS
55 |
56 | # Apply the joint positions
57 | for jointIndex in range(self.num_arm_joints):
58 | p.resetJointState(self.uid, jointIndex, self.joint_positions[jointIndex])
59 | p.setJointMotorControl2(self.uid, jointIndex, p.POSITION_CONTROL,
60 | targetPosition=self.joint_positions[jointIndex], force=self.max_force,
61 | maxVelocity=self.max_velocity, positionGain=0.3, velocityGain=1)
62 |
63 | # Important to call getLinkState only once at the beginning, otherwise PyBullet bug makes the robot unstable.
64 | ee_pose = p.getLinkState(self.uid, self.link_ee)
65 | self.ee_position = list(ee_pose[0])
66 | self.ee_orientation = list(ee_pose[1])
67 |
68 | def get_member_pose(self):
69 | link_member_pose = p.getLinkState(self.uid, self.link_member)
70 | return [link_member_pose[0], link_member_pose[1]]
71 |
72 | def get_target_pose(self):
73 | pose = p.getLinkState(self.target_uid, self.link_target)[4:6]
74 | # Offset the hole center from corner
75 | return [(np.array(pose[0]) + HOLE_OFFSET).tolist(), pose[1]]
76 |
77 | def enable_force_torque_sensor(self):
78 | # FT sensor is measured at the center of mass in Pybullet
79 | p.enableJointForceTorqueSensor(self.uid, self.link_sensor)
80 |
81 | def get_force_torque(self):
82 | # FT reading in pybullet is negated and needs to be scaled down
83 | ft = np.multiply(-0.1, p.getJointState(self.uid, self.link_sensor)[2]).tolist()
84 | return ft
85 |
86 | def apply_action_pose(self, delta_pose):
87 | for i in range(3):
88 | self.ee_position[i] += delta_pose[i]
89 |
90 | orn = list(p.getEulerFromQuaternion(self.ee_orientation))
91 | for i in range(3):
92 | orn[i] += delta_pose[i+3]
93 | self.ee_orientation = p.getQuaternionFromEuler(orn)
94 |
95 | joint_positions = p.calculateInverseKinematics(self.uid, self.num_arm_joints, self.ee_position, self.ee_orientation)
96 |
97 | for i in range(self.num_arm_joints):
98 | p.setJointMotorControl2(bodyUniqueId=self.uid, jointIndex=i, controlMode=p.POSITION_CONTROL,
99 | targetPosition=joint_positions[i], targetVelocity=0, force=self.max_force,
100 | maxVelocity=self.max_velocity, positionGain=0.3, velocityGain=1)
101 |
102 | def apply_action_position(self, delta_pos):
103 | for i in range(3):
104 | self.ee_position[i] += delta_pos[i]
105 |
106 | joint_positions = p.calculateInverseKinematics(self.uid, self.num_arm_joints, self.ee_position, self.ee_orientation)
107 |
108 | for i in range(self.num_arm_joints):
109 | p.setJointMotorControl2(bodyUniqueId=self.uid, jointIndex=i, controlMode=p.POSITION_CONTROL,
110 | targetPosition=joint_positions[i], targetVelocity=0, force=self.max_force,
111 | maxVelocity=self.max_velocity, positionGain=0.3, velocityGain=1)
112 |
113 |
114 |
--------------------------------------------------------------------------------
/envs/robot_sim_robotless.py:
--------------------------------------------------------------------------------
1 | import math
2 | import pybullet as p
3 | import numpy as np
4 | import transforms3d
5 | import utilities as util
6 |
7 | # lap joint task
8 | INITIAL_POS = np.array([0.0, 0.0, 0.24])
9 | INITIAL_ORN = util.mat33_to_quat(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))
10 | TARGET_POS = np.array([0, 0, 0])
11 | TARGET_ORN = np.array([0, 0, math.pi])
12 | URDF_PATH_TOOL = 'envs/urdf/robotless_lap_joint/tool'
13 | URDF_PATH_TARGET = 'envs/urdf/robotless_lap_joint/task_lap_90deg'
14 |
15 |
16 | class RobotSimRobotless():
17 |
18 | def __init__(self):
19 |
20 | self.uid = p.loadURDF(util.format_urdf_filepath(URDF_PATH_TOOL),
21 | basePosition=INITIAL_POS,
22 | baseOrientation=INITIAL_ORN,
23 | useFixedBase=0)
24 | self.link_member = 2
25 | self.link_gripper = 1
26 | self.link_sensor = 0
27 |
28 | self.target_uid = p.loadURDF(util.format_urdf_filepath(URDF_PATH_TARGET),
29 | basePosition=TARGET_POS,
30 | baseOrientation=util.xyzw_by_euler(TARGET_ORN, 'sxyz'),
31 | useFixedBase=1)
32 | self.link_target = 0
33 | util.display_frame_axis(self.target_uid, self.link_target)
34 |
35 | self.max_force = -1
36 |
37 | # set friction
38 | """
39 | p.changeDynamics(self.uid,
40 | self.link_member,
41 | lateralFriction=1.0)
42 | p.changeDynamics(self.target_uid,
43 | 1,
44 | lateralFriction=1.0)
45 | """
46 |
47 | self.base_constraint = p.createConstraint(
48 | parentBodyUniqueId=self.uid,
49 | parentLinkIndex=-1, # base index
50 | childBodyUniqueId=-1, # base index
51 | childLinkIndex=-1, # base index
52 | jointType=p.JOINT_FIXED,
53 | jointAxis=[0, 0, 0],
54 | parentFramePosition=[0, 0, 0],
55 | childFramePosition=INITIAL_POS,
56 | childFrameOrientation=INITIAL_ORN
57 | )
58 |
59 | self.base_pose = p.getBasePositionAndOrientation(self.uid)
60 | self.gripper_pose = p.getLinkState(self.uid, self.link_gripper)[4:6]
61 | self.member_pose = p.getLinkState(self.uid, self.link_member)[4:6]
62 |
63 | self.from_base_to_member = util.get_f1_to_f2_xform(self.base_pose, self.member_pose)
64 |
65 | for link_index in range(p.getNumJoints(self.uid)):
66 | util.display_frame_axis(self.uid, link_index)
67 |
68 | def get_member_pose(self):
69 | base_pose = p.getBasePositionAndOrientation(self.uid)
70 | member_pose_mat = util.transform_mat(self.from_base_to_member,
71 | util.mat44_by_pos_quat(base_pose[0], base_pose[1]))
72 | member_pose = util.mat44_to_pos_quat(member_pose_mat)
73 |
74 | return [member_pose[0], member_pose[1]]
75 |
76 | def get_target_pose(self):
77 | return p.getLinkState(self.target_uid, self.link_target)[4:6]
78 |
79 | def enable_force_torque_sensor(self):
80 | # FT sensor is measured at the center of mass in Pybullet
81 | p.enableJointForceTorqueSensor(self.uid, self.link_sensor)
82 |
83 | def get_force_torque(self):
84 | # FT reading in pybullet is negated and needs to be scaled down
85 | return np.multiply(-0.1, p.getJointState(self.uid, self.link_sensor)[2]).tolist()
86 |
87 | def apply_action_pose(self, delta_pose):
88 | # pybullet oddity:
89 | # mysterious 5-time relationship between commanded velocity and resulting velocity, when using the changeConstraint() method
90 | delta_pose = np.multiply(np.array(delta_pose), 5.0)
91 |
92 | relative_pos = np.array(delta_pose[0:3])
93 | base_pos, base_orn = p.getBasePositionAndOrientation(self.uid)
94 | new_pos = np.add(np.array(base_pos), relative_pos).tolist()
95 |
96 | ang_vel_quat = [0, delta_pose[3], delta_pose[4], delta_pose[5]]
97 | new_orn = np.add(base_orn, np.multiply(0.5,
98 | util.wxyz_to_xyzw(transforms3d.quaternions.qmult(ang_vel_quat, util.xyzw_to_wxyz(base_orn)))))
99 |
100 | p.changeConstraint(self.base_constraint, new_pos, new_orn, self.max_force)
101 |
102 | def apply_action_position(self, delta_pos):
103 | # pybullet oddity:
104 | # mysterious 5-time relationship between commanded velocity and resulting velocity, when using the changeConstraint() method
105 | delta_pos = np.multiply(np.array(delta_pos), 5.0)
106 |
107 | base_pos, base_orn = p.getBasePositionAndOrientation(self.uid)
108 | new_pos = np.add(np.array(base_pos), delta_pos).tolist()
109 |
110 | p.changeConstraint(self.base_constraint, new_pos, INITIAL_ORN, self.max_force)
111 |
112 |
113 |
--------------------------------------------------------------------------------
/envs/task.py:
--------------------------------------------------------------------------------
1 | import math
2 | from abc import ABC, abstractmethod
3 |
4 | import gym
5 | import numpy as np
6 | from gym import spaces
7 |
8 | import utilities as util
9 |
10 | WRITE_CSV = False
11 |
12 |
13 | class Task(ABC, gym.Env):
14 |
15 | def __init__(self,
16 | time_step=None,
17 | max_steps=None,
18 | step_limit=None,
19 | action_dim=None,
20 | max_vel=None,
21 | max_rad=None,
22 | ft_obs_only=None,
23 | limit_ft=None,
24 | max_ft=None,
25 | max_position_range=None,
26 | dist_threshold=None):
27 | super().__init__()
28 |
29 | self._max_step = max_steps
30 | self._step_limit = step_limit
31 | # max linear and rotational velocity command
32 | self._max_vel = max_vel
33 | self._max_rad = max_rad
34 | # only use force torque as observation
35 | self._ft_obs_only = ft_obs_only
36 |
37 | self._time_step = time_step
38 | self._observation = []
39 | self._env_step_counter = 0
40 | self._num_success = 0
41 |
42 | self._limit_force_torque = limit_ft
43 | self._max_force_torque = max_ft
44 | self._force_torque_violations = [0.0] * len(self._max_force_torque)
45 | self._ft_range_ratio = 1
46 |
47 | """ Define Gym Spaces for observations and actions """
48 | self._max_pos_range = max_position_range
49 | if self._ft_obs_only: # no pose observation
50 | self.observation_dim = len(self._max_force_torque)
51 | observation_high = np.array(self._max_force_torque)
52 | observation_low = - observation_high
53 | elif action_dim == 6: # 6 DOF
54 | self.observation_dim = 7 + len(self._max_force_torque)
55 | observation_orn_high = [1] * 4
56 | observation_high = np.array(self._max_pos_range + observation_orn_high + self._max_force_torque)
57 | observation_low = -observation_high
58 | else: # 3 DOF
59 | self.observation_dim = 3 + len(self._max_force_torque)
60 | observation_high = np.array(self._max_pos_range + self._max_force_torque)
61 | observation_low = - observation_high
62 | self.observation_space = spaces.Box(observation_low, observation_high)
63 |
64 | self._action_bound = 1
65 | action_high = np.array([self._action_bound] * action_dim)
66 | self.action_space = spaces.Box(-action_high, action_high)
67 | self.action_dim = action_dim
68 |
69 | self.member_pose = []
70 | self.force_torque = []
71 |
72 | self.dist_threshold = dist_threshold
73 |
74 | # csv headers
75 | if WRITE_CSV:
76 | util.write_csv(["step_member_pose", "pos_X", "pos_Y", "pos_Z", "qX", "qY", "qZ", "qW"], 'member_pose.csv',
77 | True)
78 | util.write_csv(["step_ft", "Fx", "Fy", "Fz", "Tx", "Ty", "Tz"], 'ft_reading.csv', True)
79 | if self.action_dim == 3:
80 | util.write_csv(["step_actions", "vel_X", "vel_Y", "vel_Z"], 'data_out.csv', True)
81 | else:
82 | util.write_csv(["step_actions", "vel_X", "vel_Y", "vel_Z", "rot_vel_X", "rot_vel_Y", "rot_vel_Z"],
83 | 'data_out.csv', True)
84 |
85 | @abstractmethod
86 | def reset(self):
87 | pass
88 |
89 | def pos_dist_to_target(self):
90 | self.member_pose = self.get_member_pose()
91 |
92 | # log
93 | if WRITE_CSV:
94 | util.write_csv([self._env_step_counter] + self.member_pose[0] + self.member_pose[1], 'member_pose.csv',
95 | False)
96 |
97 | member_pos = list(self.member_pose[0])
98 | target_pose = self.get_target_pose()
99 | target_pos = list(target_pose[0])
100 |
101 | dist_pos = np.linalg.norm(np.subtract(member_pos, target_pos)) # linear dist in m
102 | # util.prGreen("pos dist: {}".format(dist_pos))
103 |
104 | return dist_pos
105 |
106 | def orn_dist_to_target(self):
107 | member_orn = list(self.member_pose[1])
108 | target_pose = self.get_target_pose()
109 | target_orn = list(target_pose[1])
110 |
111 | dist_orn = math.fabs(2 * math.acos(math.fabs(np.dot(member_orn, target_orn))) - math.pi) # angle diff in rad
112 | # util.prGreen("orn dist: {}".format(dist_orn))
113 |
114 | return dist_orn
115 |
116 | def dist_to_target(self):
117 | dist_pos = self.pos_dist_to_target()
118 | dist = dist_pos
119 |
120 | if self.action_dim > 3:
121 | dist_orn = self.orn_dist_to_target()
122 | dist = dist_pos + 0.05 * dist_orn
123 |
124 | # util.prGreen("dist: {}".format(dist))
125 | return dist
126 |
127 | def get_extended_observation(self):
128 | self._observation = []
129 |
130 | if not self._ft_obs_only:
131 | if self.action_dim > 3:
132 | pos, orn = self.member_pose[0], self.member_pose[1]
133 | self._observation.extend(pos)
134 | self._observation.extend(orn)
135 | else:
136 | pos = self.member_pose[0]
137 | self._observation.extend(pos)
138 |
139 | self.force_torque = self.get_force_torque()
140 |
141 | if WRITE_CSV:
142 | util.write_csv([self._env_step_counter] + self.force_torque, 'ft_reading.csv', False)
143 |
144 | if self._limit_force_torque:
145 | self.check_ft_limit(self.force_torque)
146 | # if self._force_torque_violations != [0]*len(self.force_torque):
147 | # util.prRed(self._force_torque_violations)
148 |
149 | self._observation.extend(self.force_torque)
150 |
151 | return self._observation
152 |
153 | def step(self, action):
154 | if len(action) > 3:
155 | delta_lin = np.array(action[0:3]) * self._max_vel * self._time_step
156 | delta_rot = np.array(action[3:6]) * self._max_rad * self._time_step
157 | delta = np.append(delta_lin, delta_rot)
158 | else:
159 | delta = np.array(action) * self._max_vel * self._time_step
160 |
161 | if self._limit_force_torque:
162 | self.constrain_velocity_for_ft(delta)
163 |
164 | if WRITE_CSV:
165 | util.write_csv([self._env_step_counter] + list(delta), 'data_out.csv', False)
166 |
167 | return self.step2(delta)
168 |
169 | # To be overridden
170 | def step2(self, delta):
171 | self._env_step_counter += 1
172 | reward, done, num_success = self.reward()
173 | self._observation = self.get_extended_observation()
174 |
175 | return np.array(self._observation), reward, done, {"num_success": num_success}
176 |
177 | def reward(self):
178 |
179 | done = False
180 |
181 | dist = self.dist_to_target()
182 | reward_dist = - dist
183 |
184 | reward_ft = 0
185 | # reward_ft = self.reward_force_torque()
186 | reward = reward_dist + 0.05 * reward_ft
187 | # util.prRed(reward)
188 |
189 | if dist < self.dist_threshold:
190 | done = True
191 | reward += 1000
192 | util.prGreen("SUCCESS with " + str(self._env_step_counter) + " steps")
193 | self._num_success = 1
194 |
195 | if self._step_limit and self._env_step_counter > self._max_step:
196 | done = True
197 | util.prRed("FAILED")
198 | self._num_success = 0
199 |
200 | return reward, done, self._num_success
201 |
202 | """ Rewards and constraints based on FT limits """
203 |
204 | def reward_force_torque(self):
205 |
206 | ft_contact_limit = np.multiply(self._max_force_torque, 0.5) # define contact limit ft as % of max
207 | # util.prGreen('force torque: {}'.format(self.force_torque))
208 | indices = Task.check_list_bounds(self.force_torque, ft_contact_limit)
209 | # util.prGreen(indices)
210 |
211 | max_ft = 0
212 | for i in range(len(indices)):
213 | ft_excess_ratio = (indices[i] * self.force_torque[i] - ft_contact_limit[i]) / self._max_force_torque[i]
214 | if (indices[i] != 0) & (max_ft < ft_excess_ratio):
215 | max_ft = ft_excess_ratio
216 | reward_ft = - max_ft
217 |
218 | # experiment with discouraging FT overload with negative reward
219 | if self._limit_force_torque & (self._force_torque_violations != [0.0] * len(self._force_torque_violations)):
220 | reward_ft -= 5
221 |
222 | # util.prRed('reward ft: {}'.format(reward_ft))
223 | return reward_ft
224 |
225 | @staticmethod
226 | def check_list_bounds(l, l_bounds):
227 | assert len(l) == len(l_bounds)
228 | index_list = [0] * len(l)
229 | for i in range(len(l)):
230 | if math.fabs(l[i]) >= l_bounds[i]:
231 | # util.prGreen("index {}: {}".format(i,l[i]))
232 | index_list[i] = np.sign(l[i])
233 |
234 | # in the form of [0, 0, 1, 0, -1, 0]
235 | return index_list
236 |
237 | def check_ft_limit(self, force_torque):
238 | # check force torque against limits
239 | self._force_torque_violations = Task.check_list_bounds(force_torque,
240 | np.multiply(self._ft_range_ratio,
241 | self._max_force_torque))
242 |
243 | def constrain_velocity_for_ft(self, velocity):
244 | # stop (only) linear motion along the direction where force torque limit is violated
245 |
246 | force_list = self._force_torque_violations[0:3]
247 | torque_list = self._force_torque_violations[3:6]
248 | lin_vel = velocity[0:3]
249 | axes = ['x', 'y', 'z']
250 |
251 | for i in range(len(force_list)):
252 | if (force_list[i] != 0) & (np.sign(force_list[i]) != np.sign(lin_vel[i])):
253 | lin_vel[i] = -0.1 * lin_vel[i]
254 |
255 | for i in range(len(torque_list)):
256 | if torque_list[i] != 0:
257 | for j in range(len(torque_list)):
258 | if j != i:
259 | lin_vel[j] = 0.0
260 |
261 | velocity[0:3] = lin_vel
262 |
263 | return velocity
264 |
--------------------------------------------------------------------------------
/envs/task_real.py:
--------------------------------------------------------------------------------
1 | # Task setup for working with the real robot and sensor
2 |
3 | import numpy as np
4 |
5 | from envs.task import Task
6 |
7 |
8 | class TaskReal(Task):
9 |
10 | def __init__(self,
11 | env_robot=None,
12 | time_step=None,
13 | max_steps=None,
14 | step_limit=None,
15 | action_dim=None,
16 | max_vel=None,
17 | max_rad=None,
18 | ft_obs_only=None,
19 | limit_ft=None,
20 | max_ft=None,
21 | max_position_range=None,
22 | dist_threshold=None):
23 |
24 | super().__init__(max_steps=max_steps,
25 | action_dim=action_dim,
26 | step_limit=step_limit,
27 | max_vel=max_vel,
28 | max_rad=max_rad,
29 | ft_obs_only=ft_obs_only,
30 | limit_ft=limit_ft,
31 | time_step=time_step,
32 | max_ft=max_ft,
33 | max_position_range=max_position_range,
34 | dist_threshold=dist_threshold)
35 |
36 | self.env = env_robot()
37 |
38 | def reset(self):
39 | self.max_dist = self.dist_to_target()
40 | self._env_step_counter = 0
41 | self._observation = self.get_extended_observation()
42 |
43 | return np.array(self._observation)
44 |
45 | def get_member_pose(self):
46 | return self.env.get_member_pose()
47 |
48 | def get_target_pose(self):
49 | return self.env.get_target_pose()
50 |
51 | def get_force_torque(self):
52 | return self.env.get_force_torque()
53 |
54 | def step2(self, delta):
55 | reward, done, num_success = self.reward()
56 |
57 | if done:
58 | if self.action_dim > 3:
59 | last_delta = [0.0] * 6
60 | self.env.apply_action_pose(last_delta, 1)
61 | else:
62 | last_delta = [0.0] * 3
63 | self.env.apply_action_position(last_delta, 1)
64 | else:
65 | if self.action_dim > 3:
66 | self.env.apply_action_pose(delta, 0)
67 | else:
68 | self.env.apply_action_position(delta, 0)
69 |
70 | self._env_step_counter += 1
71 |
72 | self._observation = self.get_extended_observation()
73 |
74 | return np.array(self._observation), reward, done, {"num_success": num_success}
75 |
--------------------------------------------------------------------------------
/envs/task_sim.py:
--------------------------------------------------------------------------------
1 | import pybullet as p
2 | import time
3 |
4 | import numpy as np
5 | from gym.utils import seeding
6 |
7 | from envs.task import Task
8 |
9 |
10 | class TaskSim(Task):
11 |
12 | def __init__(self,
13 | env_robot=None,
14 | self_collision_enabled=None,
15 | renders=None,
16 | ft_noise=None,
17 | pose_noise=None,
18 | action_noise=None,
19 | physical_noise=None,
20 | time_step=None,
21 | max_steps=None,
22 | step_limit=None,
23 | action_dim=None,
24 | max_vel=None,
25 | max_rad=None,
26 | ft_obs_only=None,
27 | limit_ft=None,
28 | max_ft=None,
29 | max_position_range=None,
30 | dist_threshold=None):
31 |
32 | super().__init__(max_steps=max_steps,
33 | action_dim=action_dim,
34 | step_limit=step_limit,
35 | max_vel=max_vel,
36 | max_rad=max_rad,
37 | ft_obs_only=ft_obs_only,
38 | limit_ft=limit_ft,
39 | time_step=time_step,
40 | max_ft=max_ft,
41 | max_position_range=max_position_range,
42 | dist_threshold=dist_threshold)
43 |
44 | self._env_robot = env_robot
45 | self._self_collision_enabled = self_collision_enabled
46 | self._renders = renders
47 |
48 | """ parameters to control the level of domain randomization """
49 | self._ft_noise = ft_noise
50 | self._ft_noise_level = [0.5, 0.5, 0.5, 0.05, 0.05, 0.05] # N N N Nm Nm Nm
51 | self._ft_bias_level = [2.0, 2.0, 2.0, 0.2, 0.2, 0.2] # N N N Nm Nm Nm
52 | self._ft_bias = 0.0
53 | self._pose_noise = pose_noise
54 | self._pos_noise_level = 0.001 # m
55 | self._orn_noise_level = 0.001 # rad
56 | self._action_noise = action_noise
57 | self._action_noise_lin = 0.001 # multiplier for linear translation
58 | self._action_noise_rot = 0.001 # multiplier for rotation
59 | self._physical_noise = physical_noise
60 | self._friction_noise_level = 0.1
61 |
62 | if self._renders:
63 | cid = p.connect(p.SHARED_MEMORY)
64 | if (cid < 0):
65 | cid = p.connect(p.GUI)
66 | p.resetDebugVisualizerCamera(0.6, 180, -41, [0, 0, 0])
67 | else:
68 | p.connect(p.DIRECT)
69 |
70 | # p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0)
71 |
72 | self.seed()
73 |
74 | def reset(self):
75 | p.resetSimulation()
76 | p.setPhysicsEngineParameter(numSolverIterations=150, enableFileCaching=0)
77 | p.setTimeStep(self._time_step)
78 | p.setGravity(0, 0, 0)
79 |
80 | self.env = self._env_robot()
81 | self.max_dist = self.dist_to_target()
82 |
83 | self._env_step_counter = 0
84 | self.env.enable_force_torque_sensor()
85 | p.stepSimulation()
86 |
87 | self.correlated_noise()
88 |
89 | self._observation = self.get_extended_observation()
90 | self.add_observation_noise()
91 |
92 | return np.array(self._observation)
93 |
94 | def __del__(self):
95 | p.disconnect()
96 |
97 | def seed(self, seed=None):
98 | self.np_random, seed = seeding.np_random(seed)
99 | return [seed]
100 |
101 | def get_member_pose(self):
102 | return self.env.get_member_pose()
103 |
104 | def get_target_pose(self):
105 | return self.env.get_target_pose()
106 |
107 | def get_force_torque(self):
108 | return self.env.get_force_torque()
109 |
110 | def step2(self, delta):
111 | reward, done, num_success = self.reward()
112 |
113 | if not done:
114 | if self.action_dim > 3:
115 | self.env.apply_action_pose(delta)
116 | else:
117 | self.env.apply_action_position(delta)
118 | else:
119 | if self.action_dim > 3:
120 | self.env.apply_action_pose([0.0] * 6)
121 | else:
122 | self.env.apply_action_position([0.0] * 3)
123 |
124 | p.stepSimulation()
125 | self._env_step_counter += 1
126 |
127 | if self._renders:
128 | time.sleep(self._time_step)
129 |
130 | self._observation = self.get_extended_observation()
131 | self.add_observation_noise()
132 |
133 | return np.array(self._observation), reward, done, {"num_success": num_success}
134 |
135 | def correlated_noise(self):
136 | # force torque sensor bias, sampled at the start of each episode
137 | if self._ft_noise:
138 | self._ft_bias = self.add_gaussian_noise(0.0, self._ft_bias_level, [0] * len(self._ft_bias_level))
139 |
140 | # add some physical noise here
141 | if self._physical_noise:
142 | self.add_all_friction_noise(self._friction_noise_level)
143 |
144 | def uncorrelated_pose_noise(self, pos, orn):
145 | if self._pose_noise:
146 | pos = self.add_gaussian_noise(0.0, self._pos_noise_level, pos)
147 | orn_euler = p.getEulerFromQuaternion(orn)
148 | orn_euler = self.add_gaussian_noise(0.0, self._orn_noise_level, orn_euler)
149 | orn = p.getQuaternionFromEuler(orn_euler)
150 |
151 | return pos, orn
152 |
153 | def uncorrelated_position_noise(self, pos):
154 | if self._pose_noise:
155 | pos = self.add_gaussian_noise(0.0, self._pos_noise_level, pos)
156 |
157 | return pos
158 |
159 | def add_ft_noise(self, force_torque):
160 | if self._ft_noise:
161 | force_torque = self.add_gaussian_noise(0.0, self._ft_noise_level, force_torque)
162 | force_torque = np.add(force_torque, self._ft_bias).tolist()
163 |
164 | return force_torque
165 |
166 | def add_observation_noise(self):
167 | if not self._ft_obs_only:
168 | if self.action_dim > 3:
169 | self._observation[0:3], self._observation[3:7] = self.uncorrelated_pose_noise(self._observation[0:3],
170 | self._observation[3:7])
171 | self._observation[7:13] = self.add_ft_noise(self._observation[7:13])
172 | else:
173 | self._observation[0:3] = self.uncorrelated_position_noise(self._observation[0:3])
174 | self._observation[3:9] = self.add_ft_noise(self._observation[3:9])
175 | else:
176 | self._observation[0:6] = self.add_ft_noise(self._observation[0:6])
177 |
178 | def add_all_friction_noise(self, noise_level):
179 | self.add_body_friction_noise(self.env.target_uid, self.env.link_target, noise_level)
180 | self.add_body_friction_noise(self.env.uid, self.env.link_member, noise_level)
181 |
182 | @staticmethod
183 | def add_body_friction_noise(uid, link, noise_level):
184 | dynamics = p.getDynamicsInfo(uid, link)
185 | noise_range = np.fabs(dynamics[1]) * noise_level
186 | friction_noise = np.random.normal(0, noise_range)
187 | p.changeDynamics(uid, link, lateralFriction=dynamics[1] + friction_noise)
188 |
189 | @staticmethod
190 | def add_gaussian_noise(mean, std, vec):
191 | noise = np.random.normal(mean, std, np.shape(vec))
192 | return np.add(vec, noise).tolist()
193 |
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/insertion_box.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/panda_hand.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/panda_peg.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
316 |
317 |
341 |
342 |
369 |
370 |
371 |
372 |
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/finger.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/finger.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/hand.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/hand.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/insertion_box.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/insertion_box.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/link0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link0.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/link1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link1.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/link2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link2.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/link3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link3.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/link4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link4.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/link5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link5.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/link6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link6.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/link7.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/link7.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/collision/peg.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/collision/peg.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/visual/insertion_box.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/visual/insertion_box.stl
--------------------------------------------------------------------------------
/envs/urdf/panda_peg_in_hole/stl/visual/peg.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/panda_peg_in_hole/stl/visual/peg.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_1.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_2.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_3.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_visual.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_0mm_visual.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_1.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_2.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_3.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_visual.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_1mm_visual.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_1.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_2.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_3.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_visual.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_2mm_visual.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_1.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_2.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_3.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_visual.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/task_lap_90deg_3mm_visual.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_1.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_2.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_3.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_4.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_5.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_6.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_7.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_7.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_8.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_collision_8.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_gripper_visual.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_gripper_visual.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_1.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_2.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_collision_3.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_visual.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_lap_90deg_0mm_visual.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_1.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_2.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_sensor_collision_3.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/stl/tool_sensor_visual.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutodeskRoboticsLab/RLRoboticAssembly/7f72266f626de53762482310f92f6b5c817085b3/envs/urdf/robotless_lap_joint/stl/tool_sensor_visual.stl
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/task_lap_90deg.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/envs/urdf/robotless_lap_joint/tool.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
--------------------------------------------------------------------------------
/envs_launcher.py:
--------------------------------------------------------------------------------
1 | from envs.robot_sim_robotless import RobotSimRobotless
2 | from envs.robot_sim_panda import RobotSimPanda
3 | from envs.robot_real_example import RobotRealExample
4 |
5 | t = 'sim' # sim or real?
6 |
7 | if t == 'sim':
8 | from envs.task_sim import TaskSim
9 | def env_creator(env_config):
10 | environment = TaskSim(env_robot=RobotSimRobotless, # choose the sim robot class
11 | self_collision_enabled=True, # collision setting for pybullet
12 | renders=True, # normally for running sim and rolling out in sim, this is set to True; for training, False.
13 | ft_noise=False, # domain randomization on force/torque observation
14 | pose_noise=False, # domain randomization on pose observation
15 | action_noise=False, # domain randomization on actions
16 | physical_noise=False, # domain randomization on physical parameters
17 | time_step=1/250, # sets the control frequency of the robot
18 | max_steps=4000, # max number of steps in each episode
19 | step_limit=True, # limit the length of an episode by max_steps?
20 | action_dim=6, # dimension of action space
21 | max_vel=0.01, # max linear velocity (m/s) along each axis
22 | max_rad=0.01, # max rotational velocity (rad/s) around each axis
23 | ft_obs_only=False, # only use force/torque as observation?
24 | limit_ft=False, # limit force/torque based on max_ft?
25 | max_ft=[1000, 1000, 2500, 100, 100, 100], # max force (N) and torque (Nm)
26 | max_position_range=[2]*3, # max observation space for positions (m)
27 | dist_threshold=0.005) # an episode is considered successful when distance is within the threshold.
28 |
29 | return environment
30 |
31 |
32 | if t == 'real':
33 | from envs.task_real import TaskReal
34 | def env_creator(env_config):
35 | environment = TaskReal(env_robot=RobotRealExample, # choose the real robot class
36 | time_step=1/250,
37 | max_steps=4000,
38 | step_limit=True,
39 | action_dim=6,
40 | max_vel=0.01,
41 | max_rad=0.01,
42 | ft_obs_only=False,
43 | limit_ft=False,
44 | max_ft=[667.233, 667.233, 2001.69, 67.7908, 67.7908, 67.7908], # ATI-Delta FT sensor limits
45 | max_position_range=[2] * 3,
46 | dist_threshold=0.005)
47 |
48 | return environment
49 |
50 |
51 |
--------------------------------------------------------------------------------
/hyper_parameters/assembly_apex_ddpg.yaml:
--------------------------------------------------------------------------------
1 | assembly-apex-ddpg:
2 |
3 | env: ROBOTIC_ASSEMBLY
4 | run: APEX_DDPG
5 |
6 | checkpoint_freq: 1 # the frequency at which to store checkpoints
7 | checkpoint_at_end: True # store the last checkpoint?
8 |
9 | local_dir: "~/workspace/ray_results" # directory to store checkpoints
10 |
11 | stop:
12 | successful_rate: 0.9
13 | # training_iteration: 500 # stop training at a predefined number of iterations
14 | config:
15 | use_huber: True
16 | clip_rewards: False
17 | num_workers: 5 # > 1 and < 1/2 of total number of logical cpu cores
18 | n_step: 3
19 |
20 | target_network_update_freq: 50000
21 | buffer_size: 2000000 # this is the total buffer size that will be divided by the number of replay buffers
22 | prioritized_replay_alpha: 0.5
23 | sample_batch_size: 50
24 | train_batch_size: 512
25 | min_iter_time_s: 10
26 |
27 | actor_hiddens: [256, 256]
28 | critic_hiddens: [256, 256]
29 |
30 | parameter_noise: False
31 | batch_mode: "truncate_episodes" # "complete_episodes" or "truncate_episodes"
32 |
33 | # For APEX-DDPG, tau == 1 has the best performance.
34 | # The algorithm uses target_network_update_freq to update.
35 | tau: 1.0
36 |
37 | observation_filter: "MeanStdFilter"
38 |
39 | optimizer:
40 | num_replay_buffer_shards: 6 # the number of replay buffers used
41 | human_demonstration: True # use human demonstration in training?
42 | human_data_dir: "human_demo_data/lap" # only activated when the above param is True
43 |
--------------------------------------------------------------------------------
/rollout.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import argparse
8 | import collections
9 | import json
10 | import os
11 | import pickle
12 |
13 | import gym
14 | import ray
15 | from ray.rllib.agents.registry import get_agent_class
16 | from ray.rllib.env import MultiAgentEnv
17 | from ray.rllib.env.base_env import _DUMMY_AGENT_ID
18 | from ray.rllib.evaluation.episode import _flatten_action
19 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
20 | from ray.tune.util import merge_dicts
21 |
22 | from ray.tune.registry import register_env
23 | import envs_launcher as el
24 |
25 | EXAMPLE_USAGE = """
26 | Example Usage via RLlib CLI:
27 | rllib rollout /tmp/ray/checkpoint_dir/checkpoint-0 --run DQN
28 | --env CartPole-v0 --steps 1000000 --out rollouts.pkl
29 |
30 | Example Usage via executable:
31 | ./rollout.py /tmp/ray/checkpoint_dir/checkpoint-0 --run DQN
32 | --env CartPole-v0 --steps 1000000 --out rollouts.pkl
33 | """
34 |
35 | # Note: if you use any custom models or envs, register them here first, e.g.:
36 | #
37 | # ModelCatalog.register_custom_model("pa_model", ParametricActionsModel)
38 | # register_env("pa_cartpole", lambda _: ParametricActionCartpole(10))
39 |
40 |
41 | def create_parser(parser_creator=None):
42 | parser_creator = parser_creator or argparse.ArgumentParser
43 | parser = parser_creator(
44 | formatter_class=argparse.RawDescriptionHelpFormatter,
45 | description="Roll out a reinforcement learning agent "
46 | "given a checkpoint.",
47 | epilog=EXAMPLE_USAGE)
48 |
49 | parser.add_argument(
50 | "checkpoint", type=str, help="Checkpoint from which to roll out.")
51 | required_named = parser.add_argument_group("required named arguments")
52 | # required_named.add_argument(
53 | # "--run",
54 | # type=str,
55 | # required=True,
56 | # help="The algorithm or model to train. This may refer to the name "
57 | # "of a built-on algorithm (e.g. RLLib's DQN or PPO), or a "
58 | # "user-defined trainable function or class registered in the "
59 | # "tune registry.")
60 | required_named.add_argument(
61 | "--env", type=str, help="The gym environment to use.")
62 | parser.add_argument(
63 | "--no-render",
64 | default=False,
65 | action="store_const",
66 | const=True,
67 | help="Surpress rendering of the environment.")
68 | parser.add_argument(
69 | "--steps", default=10000, help="Number of steps to roll out.")
70 | parser.add_argument(
71 | "--episodes",
72 | default=1,
73 | type=int,
74 | help="Number of episodes to roll out.")
75 | parser.add_argument("--out", default=None, help="Output filename.")
76 | parser.add_argument(
77 | "--config",
78 | default="{}",
79 | type=json.loads,
80 | help="Algorithm-specific configuration (e.g. env, hyperparams). "
81 | "Surpresses loading of configuration from checkpoint.")
82 | return parser
83 |
84 |
85 | def run(args, parser):
86 | config = {}
87 | # Load configuration from file
88 | config_dir = os.path.dirname(args.checkpoint)
89 | config_path = os.path.join(config_dir, "params.pkl")
90 | if not os.path.exists(config_path):
91 | config_path = os.path.join(config_dir, "../params.pkl")
92 | if not os.path.exists(config_path):
93 | if not args.config:
94 | raise ValueError(
95 | "Could not find params.pkl in either the checkpoint dir or "
96 | "its parent directory.")
97 | else:
98 | with open(config_path, "rb") as f:
99 | config = pickle.load(f)
100 | if "num_workers" in config:
101 | config["num_workers"] = min(2, config["num_workers"])
102 | config = merge_dicts(config, args.config)
103 | if not args.env:
104 | if not config.get("env"):
105 | parser.error("the following arguments are required: --env")
106 | args.env = config.get("env")
107 |
108 | # remove unnecessary parameters
109 | if "num_workers" in config:
110 | del config["num_workers"]
111 | if "human_data_dir" in config["optimizer"]:
112 | del config["optimizer"]["human_data_dir"]
113 | if "human_demonstration" in config["optimizer"]:
114 | del config["optimizer"]["human_demonstration"]
115 | if "multiple_human_data" in config["optimizer"]:
116 | del config["optimizer"]["multiple_human_data"]
117 | if "num_replay_buffer_shards" in config["optimizer"]:
118 | del config["optimizer"]["num_replay_buffer_shards"]
119 | if "demonstration_zone_percentage" in config["optimizer"]:
120 | del config["optimizer"]["demonstration_zone_percentage"]
121 | if "dynamic_experience_replay" in config["optimizer"]:
122 | del config["optimizer"]["dynamic_experience_replay"]
123 | if "robot_demo_path" in config["optimizer"]:
124 | del config["optimizer"]["robot_demo_path"]
125 |
126 | ray.init()
127 |
128 | # cls = get_agent_class(args.run)
129 | # agent = cls(env=args.env, config=config)
130 |
131 | cls = get_agent_class("DDPG")
132 | agent = cls(env="ROBOTIC_ASSEMBLY", config=config)
133 |
134 | agent.restore(args.checkpoint)
135 | num_steps = int(args.steps)
136 | num_episodes = int(args.episodes)
137 | rollout(agent, args.env, num_steps, num_episodes, args.out)
138 |
139 |
140 | class DefaultMapping(collections.defaultdict):
141 | """default_factory now takes as an argument the missing key."""
142 |
143 | def __missing__(self, key):
144 | self[key] = value = self.default_factory(key)
145 | return value
146 |
147 |
148 | def default_policy_agent_mapping(unused_agent_id):
149 | return DEFAULT_POLICY_ID
150 |
151 |
152 | def rollout(agent, env_name, num_steps, num_episodes, out=None):
153 | policy_agent_mapping = default_policy_agent_mapping
154 |
155 | if hasattr(agent, "workers"):
156 | env = agent.workers.local_worker().env
157 | multiagent = isinstance(env, MultiAgentEnv)
158 | if agent.workers.local_worker().multiagent:
159 | policy_agent_mapping = agent.config["multiagent"][
160 | "policy_mapping_fn"]
161 |
162 | policy_map = agent.workers.local_worker().policy_map
163 | state_init = {p: m.get_initial_state() for p, m in policy_map.items()}
164 | use_lstm = {p: len(s) > 0 for p, s in state_init.items()}
165 | action_init = {
166 | p: _flatten_action(m.action_space.sample())
167 | for p, m in policy_map.items()
168 | }
169 | else:
170 | env = gym.make(env_name)
171 | multiagent = False
172 | use_lstm = {DEFAULT_POLICY_ID: False}
173 |
174 | if out is not None:
175 | rollouts = []
176 | steps = 0
177 | episodes = 0
178 | while steps < (num_steps or steps + 1) and (episodes < num_episodes):
179 | mapping_cache = {} # in case policy_agent_mapping is stochastic
180 | if out is not None:
181 | rollout = []
182 | obs = env.reset()
183 | agent_states = DefaultMapping(
184 | lambda agent_id: state_init[mapping_cache[agent_id]])
185 | prev_actions = DefaultMapping(
186 | lambda agent_id: action_init[mapping_cache[agent_id]])
187 | prev_rewards = collections.defaultdict(lambda: 0.)
188 | done = False
189 | reward_total = 0.0
190 | while not done and steps < (num_steps or steps + 1):
191 | multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs}
192 | action_dict = {}
193 | for agent_id, a_obs in multi_obs.items():
194 | if a_obs is not None:
195 | policy_id = mapping_cache.setdefault(
196 | agent_id, policy_agent_mapping(agent_id))
197 | p_use_lstm = use_lstm[policy_id]
198 | if p_use_lstm:
199 | a_action, p_state, _ = agent.compute_action(
200 | a_obs,
201 | state=agent_states[agent_id],
202 | prev_action=prev_actions[agent_id],
203 | prev_reward=prev_rewards[agent_id],
204 | policy_id=policy_id)
205 | agent_states[agent_id] = p_state
206 | else:
207 | a_action = agent.compute_action(
208 | a_obs,
209 | prev_action=prev_actions[agent_id],
210 | prev_reward=prev_rewards[agent_id],
211 | policy_id=policy_id)
212 | a_action = _flatten_action(a_action) # tuple actions
213 | action_dict[agent_id] = a_action
214 | prev_actions[agent_id] = a_action
215 | action = action_dict
216 |
217 | action = action if multiagent else action[_DUMMY_AGENT_ID]
218 | next_obs, reward, done, _ = env.step(action)
219 | if multiagent:
220 | for agent_id, r in reward.items():
221 | prev_rewards[agent_id] = r
222 | else:
223 | prev_rewards[_DUMMY_AGENT_ID] = reward
224 |
225 | if multiagent:
226 | done = done["__all__"]
227 | reward_total += sum(reward.values())
228 | else:
229 | reward_total += reward
230 |
231 | if out is not None:
232 | rollout.append([obs, action, next_obs, reward, done])
233 | steps += 1
234 | obs = next_obs
235 | if out is not None:
236 | rollouts.append(rollout)
237 | print("Episode reward", reward_total)
238 | episodes += 1
239 |
240 | if out is not None:
241 | pickle.dump(rollouts, open(out, "wb"))
242 |
243 |
244 | if __name__ == "__main__":
245 | register_env("ROBOTIC_ASSEMBLY", el.env_creator)
246 | parser = create_parser()
247 | args = parser.parse_args()
248 | run(args, parser)
249 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | """
2 | Test simulated envs or Collect human demonstration data
3 | """
4 |
5 | import argparse
6 | import collections
7 | import pickle
8 | import numpy as np
9 |
10 | import envs_launcher
11 | import devices
12 | import utilities
13 |
14 |
15 | def getargs():
16 | parser = argparse.ArgumentParser(
17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
18 | parser.add_argument(
19 | '--input-type',
20 | type=str,
21 | default='pbg',
22 | help='Input device type; must be equal to any of: ' +
23 | ', '.join(devices.REGISTRY.keys()))
24 | parser.add_argument(
25 | '--input-scaling',
26 | type=tuple,
27 | default=(10, 25),
28 | help='Scaling applied to device inputs; (pos, orn).')
29 | parser.add_argument(
30 | '--action-space',
31 | type=int,
32 | default=6,
33 | help='Degrees of freedom; must be equal to either 3 or 6.')
34 | parser.add_argument(
35 | '--save-demo-data',
36 | type=bool,
37 | default=False,
38 | help='Save demonstration data.')
39 | parser.add_argument(
40 | '--demo-data-path',
41 | type=str,
42 | default='human_demo_data/default',
43 | help='Location to save demonstration data')
44 | args = parser.parse_args()
45 | assert args.input_type in devices.REGISTRY, \
46 | 'arg `input-type` must be equal to any of: ' + ', '.join(devices.REGISTRY.keys())
47 | assert args.action_space == 3 or args.action_space == 6, \
48 | 'arg `action-space` must be equal to either 3 or 6.'
49 | return args
50 |
51 |
52 | def main():
53 | args = getargs()
54 |
55 | environment = envs_launcher.env_creator(args)
56 | obs = environment.reset()
57 | memory = collections.deque()
58 |
59 | device_cls = devices.REGISTRY[args.input_type]
60 | device = device_cls(*args.input_scaling)
61 | device.start()
62 |
63 | done = False
64 | while not done:
65 | device.update()
66 | action = device.pose[:args.action_space]
67 |
68 | new_obs, reward, done, info = environment.step(action)
69 |
70 | if args.save_demo_data and np.count_nonzero(action) > 0: # we don't want to save all-zero actions
71 | memory.append((obs, action, reward, new_obs, done))
72 | obs = new_obs
73 | else:
74 | device.disconnect()
75 |
76 | if args.save_demo_data:
77 | # save all the transitions
78 | file_name = args.demo_data_path
79 | out_file = open(file_name, 'wb')
80 | pickle.dump(memory, out_file)
81 | out_file.close()
82 | utilities.prGreen('Transition saved')
83 | utilities.prGreen('Steps: {}'.format(len(memory)))
84 |
85 |
86 | if __name__ == '__main__':
87 | main()
88 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import argparse
8 | import yaml
9 |
10 | import ray
11 | from ray.tests.cluster_utils import Cluster
12 | from ray.tune.config_parser import make_parser
13 | from ray.tune.result import DEFAULT_RESULTS_DIR
14 | from ray.tune.resources import resources_to_json
15 | from ray.tune.tune import _make_scheduler, run_experiments
16 |
17 | EXAMPLE_USAGE = """
18 | Training example via RLlib CLI:
19 | rllib train --run DQN --env CartPole-v0
20 |
21 | Grid search example via RLlib CLI:
22 | rllib train -f tuned_examples/cartpole-grid-search-example.yaml
23 |
24 | Grid search example via executable:
25 | ./train.py -f tuned_examples/cartpole-grid-search-example.yaml
26 |
27 | Note that -f overrides all other trial-specific command-line options.
28 | """
29 |
30 |
31 | # additional libraries for dynamic experience replay
32 | from ray.tune.registry import register_env, register_trainable
33 | from ray.rllib.agents.registry import get_agent_class
34 | import os
35 | import random
36 | import envs_launcher
37 | import utilities as util
38 | import shutil
39 | from collections import deque
40 | import pickle
41 |
42 | #========================================
43 | # Callback functions
44 | # for (1) custom metric -> success rate,
45 | # (2) for save successful robot demos
46 | #========================================
47 | def on_episode_start(info):
48 | episode = info["episode"]
49 | episode.user_data["success"] = 0
50 |
51 | def on_episode_step(info):
52 | pass
53 |
54 | def on_episode_end(info):
55 | episode = info["episode"]
56 | if len(episode.last_info_for().values()) > 0:
57 | episode.user_data["success"] = list(episode.last_info_for().values())[0]
58 | episode.custom_metrics["successful_rate"] = episode.user_data["success"]
59 |
60 | def on_sample_end(info):
61 | pass
62 |
63 | def on_postprocess_traj(info):
64 | pass
65 | # if list(info["episode"].last_info_for().values())[0] > 0:
66 | # save_episode(info["post_batch"])
67 |
68 | def on_train_result(info):
69 | if "successful_rate_mean" in info["result"]["custom_metrics"]:
70 | info["result"]["successful_rate"] = info["result"]["custom_metrics"]["successful_rate_mean"]
71 |
72 | def save_episode(samples):
73 | memory = deque()
74 | for row in samples.rows():
75 | obs = row["obs"]
76 | action = row["actions"]
77 | reward = row["rewards"]
78 | new_obs = row["new_obs"]
79 | done = row["dones"]
80 | memory.append((obs, action, reward, new_obs, done))
81 | # save transitions
82 | file_name = dir_path + str(random.random())
83 | out_file = open(file_name, 'wb')
84 | pickle.dump(memory, out_file)
85 | out_file.close()
86 | util.prGreen("A successful transition is saved, length {}".format(len(memory)))
87 |
88 | def get_task_path(yaml_file):
89 | with open(yaml_file) as f:
90 | experiments = yaml.safe_load(f)
91 | experiment_name = next(iter(experiments))
92 | dir_path = experiments[experiment_name]["local_dir"]
93 | dir_path = os.path.expanduser(dir_path)
94 | dir_path = os.path.join(dir_path, experiment_name) + "/robot_demos/"
95 | if not os.path.exists(dir_path):
96 | os.makedirs(dir_path)
97 | else:
98 | shutil.rmtree(dir_path)
99 | os.makedirs(dir_path)
100 | return dir_path
101 | #======================================
102 | # End of Callback functions
103 | #======================================
104 |
105 |
106 | def create_parser(parser_creator=None):
107 | parser = make_parser(
108 | parser_creator=parser_creator,
109 | formatter_class=argparse.RawDescriptionHelpFormatter,
110 | description="Train a reinforcement learning agent.",
111 | epilog=EXAMPLE_USAGE)
112 |
113 | # See also the base parser definition in ray/tune/config_parser.py
114 | parser.add_argument(
115 | "--ray-address",
116 | default=None,
117 | type=str,
118 | help="Connect to an existing Ray cluster at this address instead "
119 | "of starting a new one.")
120 | parser.add_argument(
121 | "--ray-num-cpus",
122 | default=None,
123 | type=int,
124 | help="--num-cpus to use if starting a new cluster.")
125 | parser.add_argument(
126 | "--ray-num-gpus",
127 | default=None,
128 | type=int,
129 | help="--num-gpus to use if starting a new cluster.")
130 | parser.add_argument(
131 | "--ray-num-nodes",
132 | default=None,
133 | type=int,
134 | help="Emulate multiple cluster nodes for debugging.")
135 | parser.add_argument(
136 | "--ray-redis-max-memory",
137 | default=None,
138 | type=int,
139 | help="--redis-max-memory to use if starting a new cluster.")
140 | parser.add_argument(
141 | "--ray-memory",
142 | default=None,
143 | type=int,
144 | help="--memory to use if starting a new cluster.")
145 | parser.add_argument(
146 | "--ray-object-store-memory",
147 | default=None,
148 | type=int,
149 | help="--object-store-memory to use if starting a new cluster.")
150 | parser.add_argument(
151 | "--experiment-name",
152 | default="default",
153 | type=str,
154 | help="Name of the subdirectory under `local_dir` to put results in.")
155 | parser.add_argument(
156 | "--local-dir",
157 | default=DEFAULT_RESULTS_DIR,
158 | type=str,
159 | help="Local dir to save training results to. Defaults to '{}'.".format(
160 | DEFAULT_RESULTS_DIR))
161 | parser.add_argument(
162 | "--upload-dir",
163 | default="",
164 | type=str,
165 | help="Optional URI to sync training results to (e.g. s3://bucket).")
166 | parser.add_argument(
167 | "--resume",
168 | action="store_true",
169 | help="Whether to attempt to resume previous Tune experiments.")
170 | parser.add_argument(
171 | "--eager",
172 | action="store_true",
173 | help="Whether to attempt to enable TF eager execution.")
174 | parser.add_argument(
175 | "--trace",
176 | action="store_true",
177 | help="Whether to attempt to enable tracing for eager mode.")
178 | parser.add_argument(
179 | "--env", default=None, type=str, help="The gym environment to use.")
180 | parser.add_argument(
181 | "--queue-trials",
182 | action="store_true",
183 | help=(
184 | "Whether to queue trials when the cluster does not currently have "
185 | "enough resources to launch one. This should be set to True when "
186 | "running on an autoscaling cluster to enable automatic scale-up."))
187 | parser.add_argument(
188 | "-f",
189 | "--config-file",
190 | default=None,
191 | type=str,
192 | help="If specified, use config options from this file. Note that this "
193 | "overrides any trial-specific options set via flags above.")
194 | return parser
195 |
196 | def run(args, parser):
197 | if args.config_file:
198 | with open(args.config_file) as f:
199 | experiments = yaml.safe_load(f)
200 |
201 | # add callbacks for self-defined metric
202 | # and save successful transitions from RL agents
203 | experiment_name = next(iter(experiments))
204 | experiments[experiment_name]["config"]["optimizer"]["robot_demo_path"] = dir_path
205 | experiments[experiment_name]["config"]["callbacks"] = {
206 | "on_episode_start": on_episode_start,
207 | "on_episode_step": on_episode_step,
208 | "on_episode_end": on_episode_end,
209 | "on_sample_end": on_sample_end,
210 | "on_train_result": on_train_result,
211 | "on_postprocess_traj": on_postprocess_traj
212 | }
213 | else:
214 | # Note: keep this in sync with tune/config_parser.py
215 | experiments = {
216 | args.experiment_name: { # i.e. log to ~/ray_results/default
217 | "run": args.run,
218 | "checkpoint_freq": args.checkpoint_freq,
219 | "keep_checkpoints_num": args.keep_checkpoints_num,
220 | "checkpoint_score_attr": args.checkpoint_score_attr,
221 | "local_dir": args.local_dir,
222 | "resources_per_trial": (
223 | args.resources_per_trial and
224 | resources_to_json(args.resources_per_trial)),
225 | "stop": args.stop,
226 | "config": dict(args.config, env=args.env),
227 | "restore": args.restore,
228 | "num_samples": args.num_samples,
229 | "upload_dir": args.upload_dir,
230 | }
231 | }
232 |
233 | for exp in experiments.values():
234 | if not exp.get("run"):
235 | parser.error("the following arguments are required: --run")
236 | if not exp.get("env") and not exp.get("config", {}).get("env"):
237 | parser.error("the following arguments are required: --env")
238 | if args.eager:
239 | exp["config"]["eager"] = True
240 | if args.trace:
241 | if not exp["config"].get("eager"):
242 | raise ValueError("Must enable --eager to enable tracing.")
243 | exp["config"]["eager_tracing"] = True
244 |
245 | if args.ray_num_nodes:
246 | cluster = Cluster()
247 | for _ in range(args.ray_num_nodes):
248 | cluster.add_node(
249 | num_cpus=args.ray_num_cpus or 1,
250 | num_gpus=args.ray_num_gpus or 0,
251 | object_store_memory=args.ray_object_store_memory,
252 | memory=args.ray_memory,
253 | redis_max_memory=args.ray_redis_max_memory)
254 | ray.init(address=cluster.address) #, log_to_driver=False)
255 | else:
256 | ray.init(
257 | address=args.ray_address,
258 | object_store_memory=args.ray_object_store_memory,
259 | memory=args.ray_memory,
260 | redis_max_memory=args.ray_redis_max_memory,
261 | num_cpus=args.ray_num_cpus,
262 | num_gpus=args.ray_num_gpus)
263 | # log_to_driver=False) # disable the loggings
264 | # https://github.com/ray-project/ray/issues/5048
265 |
266 | run_experiments(
267 | experiments,
268 | scheduler=_make_scheduler(args),
269 | queue_trials=args.queue_trials,
270 | resume=args.resume)
271 |
272 | if __name__ == "__main__":
273 | parser = create_parser()
274 | args = parser.parse_args()
275 |
276 | random.seed(12345)
277 |
278 | # register customized envs
279 | register_env("ROBOTIC_ASSEMBLY", envs_launcher.env_creator)
280 |
281 | # register customized algorithms
282 | # register_trainable("APEX_DDPG_DEMO", get_agent_class("contrib/APEX_DDPG_DEMO"))
283 |
284 | # get the path for saving robot demos
285 | dir_path = get_task_path(args.config_file)
286 |
287 | run(args, parser)
288 |
--------------------------------------------------------------------------------
/utilities.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import numpy
3 | import os
4 | import inspect
5 | import transforms3d
6 | import pybullet as p
7 |
8 | # print in color for better readability
9 | def prGreen(skk):
10 | print("\033[92m {}\033[00m" .format(skk))
11 |
12 |
13 | def prRed(skk):
14 | print("\033[91m {}\033[00m" .format(skk))
15 |
16 |
17 | def display_frame_axis(body_uid, link_index, line_length=0.05):
18 | # Red: X axis, Green: Y axis, Blue: Z axis
19 |
20 | p.addUserDebugLine([0, 0, 0], [line_length, 0, 0], [1, 0, 0],
21 | parentObjectUniqueId=body_uid, parentLinkIndex=link_index)
22 | p.addUserDebugLine([0, 0, 0], [0, line_length, 0], [0, 1, 0],
23 | parentObjectUniqueId=body_uid, parentLinkIndex=link_index)
24 | p.addUserDebugLine([0, 0, 0], [0, 0, line_length], [0, 0, 1],
25 | parentObjectUniqueId=body_uid, parentLinkIndex=link_index)
26 |
27 |
28 | def write_csv(data, csv_file, overwrite):
29 | if os.path.isfile(csv_file) & overwrite:
30 | os.remove(csv_file)
31 | with open(csv_file, 'a') as outfile:
32 | writer = csv.writer(outfile)
33 | writer.writerow(data)
34 |
35 |
36 | def format_urdf_filepath(name):
37 | dot_urdf = '.urdf'
38 | if dot_urdf not in name:
39 | name += dot_urdf
40 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
41 | # print("current_dir=" + currentdir)
42 | os.sys.path.insert(0, currentdir)
43 |
44 | return '{}/{}'.format(currentdir, name)
45 |
46 |
47 | def qinverse(q):
48 | return transforms3d.quaternions.qinverse(q[3], q[0], q[1], q[2])
49 |
50 |
51 | def xyzw_by_euler(euler, axes):
52 | q = transforms3d.euler.euler2quat(euler[0], euler[1], euler[2], axes) # s for static = extrinsic
53 | return wxyz_to_xyzw(q)
54 |
55 |
56 | def quat_to_euler(xyzw, axes): # axes specifies the type of Euler wanted
57 | return transforms3d.euler.quat2euler(xyzw_to_wxyz(xyzw), axes)
58 |
59 |
60 | def xyzw_to_wxyz(xyzw):
61 | wxyz = [xyzw[3], xyzw[0], xyzw[1], xyzw[2]]
62 | return wxyz
63 |
64 |
65 | def wxyz_to_xyzw(wxyz):
66 | xyzw = [wxyz[1], wxyz[2], wxyz[3], wxyz[0]]
67 | return xyzw
68 |
69 | # Real robot only
70 | def mat33_by_abc(abc):
71 | """
72 | Get matrix from Euler Angles
73 | :param abc:
74 | :return:
75 | """
76 | a, b, c = abc
77 | return transforms3d.euler.euler2mat(a, b, c, axes='rzyx') # r for rotating = intrinsic
78 |
79 |
80 | def mat33_by_mat44(mat):
81 | """
82 | Extract a 3x3 matrix from a 4x4 matrix.
83 | :param mat:
84 | :return:
85 | """
86 | m = numpy.eye(3, 3)
87 | for i in range(3):
88 | for j in range(3):
89 | m[i][j] = mat[i][j]
90 | return m
91 |
92 |
93 | def mat33_by_quat(xyzw):
94 | """
95 | Convert quaternion to matrix.
96 | :param xyzw: list, quaternion [x, y, z, w]
97 | :return: 3x3 matrix
98 | """
99 | wxyz = [xyzw[3], xyzw[0], xyzw[1], xyzw[2]]
100 | return transforms3d.quaternions.quat2mat(wxyz)
101 |
102 |
103 | def mat33_to_quat(mat):
104 | """
105 | Convert matrix to quaternion.
106 | :param mat: 3x3 matrix
107 | :return: list, quaternion [x, y, z, w]
108 | """
109 | wxyz = transforms3d.quaternions.mat2quat(mat)
110 | return [wxyz[1], wxyz[2], wxyz[3], wxyz[0]]
111 |
112 |
113 | # Real robot only
114 | def mat33_to_abc(mat):
115 | """
116 | Get Euler Angles from matrix.
117 | :param mat: 3x3 matrix
118 | :return: list, Euler Angles [z, y', x"]
119 | """
120 | return list(transforms3d.euler.mat2euler(mat, axes='rzyx'))
121 |
122 |
123 | def mat44_by_pos_mat33(pos, mat):
124 | """
125 | Construct a matrix using a position and matrix.
126 | :param pos: list, position [x, y, z]
127 | :param mat: 3x3 matrix.
128 | :return: 4x4 matrix
129 | """
130 | m = numpy.eye(4, 4)
131 | for i in range(3):
132 | for j in range(3):
133 | m[i][j] = mat[i][j]
134 | m[i][3] = pos[i]
135 | return m
136 |
137 |
138 | def mat44_by_pos_quat(pos, quat):
139 | """
140 | Construct a matrix using a position and orientation
141 | :param pos: list, position [x, y, z]
142 | :param quat: list, quaternion [x, y, z, w]
143 | :return: 4x4 matrix
144 | """
145 | quat_mat = mat33_by_quat(quat)
146 | return mat44_by_pos_mat33(pos, quat_mat)
147 |
148 |
149 | def mat44_by_pos_abc(pos, abc):
150 | """
151 | Construct a matrix using position and orientation.
152 | :param pos: list, position [x, y, z]
153 | :param abc: list, Euler Angles [z, y', x"]
154 | :return: 4x4 matrix
155 | """
156 | abc_mat = mat33_by_abc(abc)
157 | return mat44_by_pos_mat33(pos, abc_mat)
158 |
159 |
160 | def mat44_to_pos_abc(mat):
161 | """
162 | Construct position and orientation by matrix.
163 | :param mat: 4x4 matrix.
164 | :return: list, position [x, y, z], list, Euler Angles [z, y', x"]
165 | """
166 | return mat44_to_pos(mat), mat33_to_abc(mat)
167 |
168 |
169 | def mat44_to_pos_quat(mat):
170 | """
171 | Construct a position and orientation using a matrix.
172 | :param mat: 4x4 matrix
173 | :return: list, position [x, y, z], list, quaternion [x, y, z, w]
174 | """
175 | pos = []
176 | quat_mat = numpy.eye(3, 3)
177 | for i in range(3):
178 | for j in range(3):
179 | quat_mat[i][j] = mat[i][j]
180 | pos.append(mat[i][3])
181 | quat = mat33_to_quat(quat_mat)
182 | return pos, quat
183 |
184 |
185 | def mat44_to_pos(mat):
186 | """
187 | Get position from matrix.
188 | :param mat: 4x4 matrix
189 | :return: list, position
190 | """
191 | return [mat[i][3] for i in range(3)]
192 |
193 |
194 | def get_relative_xform(mat_from, mat_to):
195 | """
196 | Get the relative transform between two matrices or, in other words, using
197 | the transformation from one matrix to another matrix.
198 | :param mat_from: 4x4 matrix.
199 | :param mat_to: 4x4 matrix
200 | :return: 4x4 matrix.
201 | """
202 | return numpy.matmul(mat_to, numpy.linalg.inv(mat_from))
203 |
204 |
205 | def transform_mat(xform, mat):
206 | """
207 | Transform a matrix by another matrix.
208 | :param mat: 4x4 matrix
209 | :param xform: 4x4 matrix
210 | :return:
211 | """
212 | return numpy.matmul(xform, mat)
213 |
214 |
215 | def transform_mat_from_to(mat, mat_from, mat_to):
216 | """
217 | Transform a matrix using the relative transform between two matrices, or,
218 | in other words, using the transformation from one matrix to another matrix.
219 | :param mat:
220 | :param mat_from:
221 | :param mat_to:
222 | :return:
223 | """
224 | xform = get_relative_xform(mat_from, mat_to)
225 | return transform_mat(xform, mat)
226 |
227 |
228 | def get_f1_to_f2_xform(pose_from, pose_to):
229 | # util.prGreen('base pose: {}'.format(self.pose_from))
230 | # util.prGreen('member pose: {}'.format(self.pose_to))
231 | from_f1_to_f2 = get_relative_xform(mat44_by_pos_quat(pose_from[0], pose_from[1]),
232 | mat44_by_pos_quat(pose_to[0], pose_to[1]))
233 | return from_f1_to_f2
234 |
--------------------------------------------------------------------------------