├── .gitignore ├── LICENSE ├── README.md ├── assets ├── head.png └── iclr2025_poster.png ├── configs ├── base.yaml ├── calvin_rel_traj_location_bounds_task_ABC_D.json ├── dataset_control_freq.json ├── dataset_img_keys.json ├── dataset_stat.json ├── finetune_datasets.json ├── finetune_sample_weights.json ├── pretrain_datasets.json ├── pretrain_sample_weights.json ├── state_vec.py └── zero2.json ├── data ├── .gitignore ├── agilex │ └── hdf5totfrecords.py ├── aloha │ ├── hdf5totfrecords.py │ └── unzip_data.sh ├── bridgev2 │ ├── bridgedata_numpy_to_tfrecord.py │ ├── bridgedata_raw_to_numpy.py │ └── download.sh ├── calvin │ ├── download.sh │ └── hdf5totfrecords.py ├── compute_dataset_stat.py ├── compute_dataset_stat_hdf5.py ├── empty_lang_embed.pt ├── episode_transform.py ├── filelock.py ├── hdf5_maniskill_dataset.py ├── hdf5_vla_dataset.py ├── openx_embod │ └── download.sh ├── preprocess.py ├── preprocess_scripts │ ├── __init__.py │ ├── agilex.py │ ├── aloha_mobile.py │ ├── aloha_static.py │ ├── asu_table_top_converted_externally_to_rlds.py │ ├── austin_buds_dataset_converted_externally_to_rlds.py │ ├── austin_sailor_dataset_converted_externally_to_rlds.py │ ├── austin_sirius_dataset_converted_externally_to_rlds.py │ ├── bc_z.py │ ├── berkeley_autolab_ur5.py │ ├── berkeley_cable_routing.py │ ├── berkeley_fanuc_manipulation.py │ ├── berkeley_gnm_cory_hall.py │ ├── berkeley_gnm_recon.py │ ├── berkeley_gnm_sac_son.py │ ├── berkeley_mvp_converted_externally_to_rlds.py │ ├── berkeley_rpt_converted_externally_to_rlds.py │ ├── bridge.py │ ├── bridgev2.py │ ├── calvin.py │ ├── cmu_franka_exploration_dataset_converted_externally_to_rlds.py │ ├── cmu_play_fusion.py │ ├── cmu_stretch.py │ ├── columbia_cairlab_pusht_real.py │ ├── dlr_edan_shared_control_converted_externally_to_rlds.py │ ├── dlr_sara_grid_clamp_converted_externally_to_rlds.py │ ├── dlr_sara_pour_converted_externally_to_rlds.py │ ├── dobbe.py │ ├── droid.py │ ├── eth_agent_affordances.py │ ├── fmb.py │ ├── fractal20220817_data.py │ ├── furniture_bench_dataset_converted_externally_to_rlds.py │ ├── iamlab_cmu_pickup_insert_converted_externally_to_rlds.py │ ├── imperialcollege_sawyer_wrist_cam.py │ ├── jaco_play.py │ ├── kaist_nonprehensile_converted_externally_to_rlds.py │ ├── kuka.py │ ├── language_table.py │ ├── maniskill_dataset_converted_externally_to_rlds.py │ ├── nyu_door_opening_surprising_effectiveness.py │ ├── nyu_franka_play_dataset_converted_externally_to_rlds.py │ ├── nyu_rot_dataset_converted_externally_to_rlds.py │ ├── qut_dexterous_manpulation.py │ ├── rh20t.py │ ├── robo_net.py │ ├── robomimic_can_ph.py │ ├── robomimic_lift_ph.py │ ├── robomimic_square_ph.py │ ├── robomimic_tool_hang_ph.py │ ├── robomimic_transport_ph.py │ ├── roboset.py │ ├── roboturk.py │ ├── roboturk_real_laundrylayout.py │ ├── roboturk_real_objectsearch.py │ ├── roboturk_real_towercreation.py │ ├── stanford_hydra_dataset_converted_externally_to_rlds.py │ ├── stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py │ ├── stanford_mask_vit_converted_externally_to_rlds.py │ ├── stanford_robocook_converted_externally_to_rlds.py │ ├── taco_play.py │ ├── tokyo_u_lsmo_converted_externally_to_rlds.py │ ├── toto.py │ ├── ucsd_kitchen_dataset_converted_externally_to_rlds.py │ ├── ucsd_pick_and_place_dataset_converted_externally_to_rlds.py │ ├── uiuc_d3field.py │ ├── usc_cloth_sim_converted_externally_to_rlds.py │ ├── utaustin_mutex.py │ ├── utokyo_pr2_opening_fridge_converted_externally_to_rlds.py │ ├── utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py │ ├── utokyo_xarm_bimanual_converted_externally_to_rlds.py │ ├── utokyo_xarm_pick_and_place_converted_externally_to_rlds.py │ └── viola.py ├── producer.py ├── rh20t │ └── hdf5totfrecords.py ├── roboset │ ├── download.py │ ├── download.sh │ ├── h5totfrecords.py │ └── links.txt ├── utils.py └── vla_dataset.py ├── docs ├── pretrain.md └── test_6drot.py ├── eval_sim ├── eval_dp.py ├── eval_octo.py ├── eval_openvla.py └── eval_rdt_maniskill.py ├── finetune.sh ├── finetune_maniskill.sh ├── inference.sh ├── main.py ├── models ├── ema_model.py ├── hub_mixin.py ├── multimodal_encoder │ ├── clip_encoder.py │ ├── dinov2_encoder.py │ ├── siglip_encoder.py │ └── t5_encoder.py ├── rdt │ ├── blocks.py │ └── model.py └── rdt_runner.py ├── pretrain.sh ├── requirements.txt ├── requirements_data.txt ├── scripts ├── agilex_inference.py ├── agilex_model.py ├── encode_lang.py ├── encode_lang_batch.py └── maniskill_model.py └── train ├── dataset.py ├── image_corrupt.py ├── sample.py └── train.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 TSAIL group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thu-ml/RoboticsDiffusionTransformer/9af5241cb4456836ddf852b5a0286441f7b5d1d6/assets/head.png -------------------------------------------------------------------------------- /assets/iclr2025_poster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thu-ml/RoboticsDiffusionTransformer/9af5241cb4456836ddf852b5a0286441f7b5d1d6/assets/iclr2025_poster.png -------------------------------------------------------------------------------- /configs/base.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | # The number of historical images 3 | img_history_size: 2 4 | # The number of future actions to predict 5 | action_chunk_size: 64 6 | # The number of cameras to be used in the model 7 | num_cameras: 3 8 | # Dimension for state/action, we use the same space for both state and action 9 | # This MUST be equal to configs/state_vec.py 10 | state_dim: 128 11 | 12 | 13 | dataset: 14 | # We will extract the data from raw dataset 15 | # and store them in the disk buffer by producer 16 | # When training, we will read the data 17 | # randomly from the buffer by consumer 18 | # The producer will replace the data which has been 19 | # read by the consumer with new data 20 | 21 | # The path to the buffer (at least 400GB) 22 | buf_path: /path/to/buffer 23 | # The number of chunks in the buffer 24 | buf_num_chunks: 512 25 | # The number of samples (step rather than episode) in each chunk 26 | buf_chunk_size: 512 27 | 28 | # We will filter the episodes with length less than `epsd_len_thresh_low` 29 | epsd_len_thresh_low: 32 30 | # For those more than `epsd_len_thresh_high`, 31 | # we will randomly sample `epsd_len_thresh_high` steps each time we load the episode 32 | # to better balance the training datasets 33 | epsd_len_thresh_high: 2048 34 | # How to fit the image size 35 | image_aspect_ratio: pad 36 | # Maximum number of language tokens 37 | tokenizer_max_length: 1024 38 | 39 | model: 40 | # Config for condition adpators 41 | lang_adaptor: mlp2x_gelu 42 | img_adaptor: mlp2x_gelu 43 | state_adaptor: mlp3x_gelu 44 | lang_token_dim: 4096 45 | img_token_dim: 1152 46 | # Dim of action or proprioception vector 47 | # A `state` refers to an action or a proprioception vector 48 | state_token_dim: 128 49 | # Config for RDT structure 50 | rdt: 51 | # 1B: num_head 32 hidden_size 2048 52 | hidden_size: 2048 53 | depth: 28 54 | num_heads: 32 55 | cond_pos_embed_type: multimodal 56 | # For noise scheduler 57 | noise_scheduler: 58 | type: ddpm 59 | num_train_timesteps: 1000 60 | num_inference_timesteps: 5 61 | beta_schedule: squaredcos_cap_v2 # Critical choice 62 | prediction_type: sample 63 | clip_sample: False 64 | # For EMA (params averaging) 65 | # We do not use EMA currently 66 | ema: 67 | update_after_step: 0 68 | inv_gamma: 1.0 69 | power: 0.75 70 | min_value: 0.0 71 | max_value: 0.9999 72 | -------------------------------------------------------------------------------- /configs/calvin_rel_traj_location_bounds_task_ABC_D.json: -------------------------------------------------------------------------------- 1 | { 2 | "A": [ 3 | [ 4 | -0.2691913843154907, 5 | -0.21995729207992554, 6 | -0.182277649641037 7 | ], 8 | [ 9 | 0.35127854347229004, 10 | 0.2769763469696045, 11 | 0.17159393429756165 12 | ] 13 | ], 14 | "B": [ 15 | [ 16 | -0.2576896846294403, 17 | -0.22244493663311005, 18 | -0.20557966828346252 19 | ], 20 | [ 21 | 0.32854634523391724, 22 | 0.2922680974006653, 23 | 0.17373555898666382 24 | ] 25 | ], 26 | "C": [ 27 | [ 28 | -0.29205888509750366, 29 | -0.24688798189163208, 30 | -0.17577645182609558 31 | ], 32 | [ 33 | 0.25053921341896057, 34 | 0.3277084231376648, 35 | 0.16431939601898193 36 | ] 37 | ], 38 | "D": [ 39 | [ 40 | -0.25131964683532715, 41 | -0.15233077108860016, 42 | -0.13294968008995056 43 | ], 44 | [ 45 | 0.19209328293800354, 46 | 0.19344553351402283, 47 | 0.1370421051979065 48 | ] 49 | ] 50 | } -------------------------------------------------------------------------------- /configs/dataset_control_freq.json: -------------------------------------------------------------------------------- 1 | { 2 | "fractal20220817_data": 3, 3 | "taco_play": 15, 4 | "jaco_play": 10, 5 | "berkeley_cable_routing": 10, 6 | "nyu_door_opening_surprising_effectiveness": 3, 7 | "viola": 20, 8 | "berkeley_autolab_ur5": 5, 9 | "toto": 30, 10 | "kuka": 10, 11 | "language_table": 10, 12 | "columbia_cairlab_pusht_real": 10, 13 | "stanford_kuka_multimodal_dataset_converted_externally_to_rlds": 20, 14 | "nyu_rot_dataset_converted_externally_to_rlds":3, 15 | "stanford_hydra_dataset_converted_externally_to_rlds": 10, 16 | "austin_buds_dataset_converted_externally_to_rlds": 20, 17 | "nyu_franka_play_dataset_converted_externally_to_rlds": 3, 18 | "maniskill_dataset_converted_externally_to_rlds": 20, 19 | "furniture_bench_dataset_converted_externally_to_rlds": 10, 20 | "ucsd_kitchen_dataset_converted_externally_to_rlds": 2, 21 | "ucsd_pick_and_place_dataset_converted_externally_to_rlds": 3, 22 | "austin_sailor_dataset_converted_externally_to_rlds": 20, 23 | "austin_sirius_dataset_converted_externally_to_rlds": 20, 24 | "bc_z": 10, 25 | "utokyo_pr2_opening_fridge_converted_externally_to_rlds": 10, 26 | "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": 10, 27 | "utokyo_xarm_pick_and_place_converted_externally_to_rlds": 10, 28 | "utokyo_xarm_bimanual_converted_externally_to_rlds": 10, 29 | "berkeley_mvp_converted_externally_to_rlds": 5, 30 | "berkeley_rpt_converted_externally_to_rlds": 30, 31 | "kaist_nonprehensile_converted_externally_to_rlds": 10, 32 | "stanford_mask_vit_converted_externally_to_rlds": 0, 33 | "tokyo_u_lsmo_converted_externally_to_rlds": 10, 34 | "dlr_sara_pour_converted_externally_to_rlds": 10, 35 | "dlr_sara_grid_clamp_converted_externally_to_rlds": 10, 36 | "dlr_edan_shared_control_converted_externally_to_rlds": 5, 37 | "asu_table_top_converted_externally_to_rlds": 12.5, 38 | "stanford_robocook_converted_externally_to_rlds": 5, 39 | "eth_agent_affordances": 66.6, 40 | "imperialcollege_sawyer_wrist_cam": 10, 41 | "iamlab_cmu_pickup_insert_converted_externally_to_rlds": 20, 42 | "uiuc_d3field": 1, 43 | "utaustin_mutex": 20, 44 | "berkeley_fanuc_manipulation": 10, 45 | "cmu_play_fusion": 5, 46 | "cmu_stretch": 10, 47 | "berkeley_gnm_recon": 3, 48 | "berkeley_gnm_cory_hall": 5, 49 | "berkeley_gnm_sac_son": 10, 50 | "robo_net": 1, 51 | "roboturk_real_towercreation": 10, 52 | "roboturk_real_laundrylayout": 10, 53 | "roboturk_real_objectsearch": 10, 54 | "aloha_mobile": 50, 55 | "aloha_static": 50, 56 | "roboset": 5, 57 | "droid": 15, 58 | "fmb": 10, 59 | "dobbe": 30, 60 | "qut_dexterous_manpulation": 30, 61 | "agilex": 25, 62 | "rh20t": 10, 63 | "calvin": 30, 64 | "bridgev2": 5 65 | } -------------------------------------------------------------------------------- /configs/finetune_datasets.json: -------------------------------------------------------------------------------- 1 | [ 2 | "agilex" 3 | ] -------------------------------------------------------------------------------- /configs/finetune_sample_weights.json: -------------------------------------------------------------------------------- 1 | { 2 | "agilex": 100 3 | } -------------------------------------------------------------------------------- /configs/pretrain_datasets.json: -------------------------------------------------------------------------------- 1 | [ 2 | "fractal20220817_data", 3 | "jaco_play", 4 | "taco_play", 5 | "berkeley_cable_routing", 6 | "viola", 7 | "berkeley_autolab_ur5", 8 | "toto", 9 | "nyu_door_opening_surprising_effectiveness", 10 | "columbia_cairlab_pusht_real", 11 | "stanford_kuka_multimodal_dataset_converted_externally_to_rlds", 12 | "austin_buds_dataset_converted_externally_to_rlds", 13 | "kuka", 14 | "utokyo_xarm_bimanual_converted_externally_to_rlds", 15 | "stanford_hydra_dataset_converted_externally_to_rlds", 16 | "maniskill_dataset_converted_externally_to_rlds", 17 | "ucsd_kitchen_dataset_converted_externally_to_rlds", 18 | "ucsd_pick_and_place_dataset_converted_externally_to_rlds", 19 | "austin_sailor_dataset_converted_externally_to_rlds", 20 | "austin_sirius_dataset_converted_externally_to_rlds", 21 | "bc_z", 22 | "utokyo_pr2_opening_fridge_converted_externally_to_rlds", 23 | "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds", 24 | "utokyo_xarm_pick_and_place_converted_externally_to_rlds", 25 | "berkeley_mvp_converted_externally_to_rlds", 26 | "berkeley_rpt_converted_externally_to_rlds", 27 | "kaist_nonprehensile_converted_externally_to_rlds", 28 | "tokyo_u_lsmo_converted_externally_to_rlds", 29 | "dlr_sara_grid_clamp_converted_externally_to_rlds", 30 | "stanford_robocook_converted_externally_to_rlds", 31 | "imperialcollege_sawyer_wrist_cam", 32 | "iamlab_cmu_pickup_insert_converted_externally_to_rlds", 33 | "utaustin_mutex", 34 | "berkeley_fanuc_manipulation", 35 | "cmu_play_fusion", 36 | "language_table", 37 | "furniture_bench_dataset_converted_externally_to_rlds", 38 | "droid", 39 | "fmb", 40 | "dobbe", 41 | "qut_dexterous_manpulation", 42 | "aloha_mobile", 43 | "aloha_static", 44 | "roboset", 45 | "rh20t", 46 | "calvin", 47 | "bridgev2" 48 | ] -------------------------------------------------------------------------------- /configs/pretrain_sample_weights.json: -------------------------------------------------------------------------------- 1 | { 2 | "fractal20220817_data": 271, 3 | "taco_play": 60, 4 | "jaco_play": 33, 5 | "berkeley_cable_routing": 8, 6 | "nyu_door_opening_surprising_effectiveness": 10, 7 | "viola": 12, 8 | "berkeley_autolab_ur5": 32, 9 | "toto": 32, 10 | "kuka": 50, 11 | "language_table": 100, 12 | "columbia_cairlab_pusht_real": 12, 13 | "stanford_kuka_multimodal_dataset_converted_externally_to_rlds": 55, 14 | "stanford_hydra_dataset_converted_externally_to_rlds": 24, 15 | "austin_buds_dataset_converted_externally_to_rlds": 7, 16 | "maniskill_dataset_converted_externally_to_rlds": 174, 17 | "furniture_bench_dataset_converted_externally_to_rlds": 71, 18 | "ucsd_kitchen_dataset_converted_externally_to_rlds": 12, 19 | "ucsd_pick_and_place_dataset_converted_externally_to_rlds": 37, 20 | "austin_sailor_dataset_converted_externally_to_rlds": 15, 21 | "austin_sirius_dataset_converted_externally_to_rlds": 24, 22 | "bc_z": 208, 23 | "utokyo_pr2_opening_fridge_converted_externally_to_rlds": 9, 24 | "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": 15, 25 | "utokyo_xarm_pick_and_place_converted_externally_to_rlds": 10, 26 | "utokyo_xarm_bimanual_converted_externally_to_rlds": 1, 27 | "berkeley_mvp_converted_externally_to_rlds": 22, 28 | "berkeley_rpt_converted_externally_to_rlds": 30, 29 | "kaist_nonprehensile_converted_externally_to_rlds": 14, 30 | "tokyo_u_lsmo_converted_externally_to_rlds": 7, 31 | "dlr_sara_grid_clamp_converted_externally_to_rlds": 1, 32 | "stanford_robocook_converted_externally_to_rlds": 50, 33 | "imperialcollege_sawyer_wrist_cam": 13, 34 | "iamlab_cmu_pickup_insert_converted_externally_to_rlds": 25, 35 | "utaustin_mutex": 39, 36 | "berkeley_fanuc_manipulation": 20, 37 | "cmu_play_fusion": 24, 38 | "droid": 303, 39 | "fmb": 42, 40 | "dobbe": 36, 41 | "qut_dexterous_manpulation": 14, 42 | "aloha_mobile": 150, 43 | "aloha_static": 150, 44 | "roboset": 135, 45 | "rh20t": 331, 46 | "calvin": 100, 47 | "bridgev2": 224 48 | } -------------------------------------------------------------------------------- /configs/zero2.json: -------------------------------------------------------------------------------- 1 | { 2 | "bf16": { 3 | "enabled": "auto" 4 | }, 5 | "train_micro_batch_size_per_gpu": "auto", 6 | "train_batch_size": "auto", 7 | "gradient_accumulation_steps": "auto", 8 | "zero_optimization": { 9 | "stage": 2, 10 | "overlap_comm": true, 11 | "contiguous_gradients": true, 12 | "sub_group_size": 1e9 13 | } 14 | } -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore data files 2 | datasets 3 | -------------------------------------------------------------------------------- /data/aloha/unzip_data.sh: -------------------------------------------------------------------------------- 1 | cd ../datasets/aloha/ 2 | 3 | unzip aloha_mobile.zip -------------------------------------------------------------------------------- /data/bridgev2/download.sh: -------------------------------------------------------------------------------- 1 | # Download the dataset to ../datasets/bridgev2 2 | mkdir -p ../datasets/bridgev2 3 | wget -O ../datasets/bridgev2/demos_8_17.zip https://rail.eecs.berkeley.edu/datasets/bridge_release/data/demos_8_17.zip 4 | mkdir -p ../datasets/bridgev2/raw 5 | # Unzip the dataset 6 | unzip '../datasets/bridgev2/*.zip' -d ../datasets/bridgev2/raw 7 | # Convert the dataset to numpy 8 | python bridgedata_raw_to_numpy.py --input ../datasets/bridgev2/raw --output ../datasets/bridgev2/npy 9 | # Convert the dataset to tfrecords 10 | python bridgedata_numpy_to_tfrecord.py --input ../datasets/bridgev2/npy --output ../datasets/bridgev2/tfrecords 11 | # Remove the raw data and numpy data 12 | rm -rf ../datasets/bridgev2/raw 13 | rm -rf ../datasets/bridgev2/npy -------------------------------------------------------------------------------- /data/calvin/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Downloading CALVIN dataset..." 4 | 5 | # Create calvin folder in ../datasets/calvin/ 6 | mkdir -p ../datasets/calvin/ 7 | 8 | cd ../datasets/calvin/ 9 | 10 | # You can use this for faster downloading 11 | # aria2c -x 16 -s 16 http://calvin.cs.uni-freiburg.de/dataset/task_ABC_D.zip 12 | 13 | wget http://calvin.cs.uni-freiburg.de/dataset/task_ABC_D.zip 14 | 15 | echo "Unzipping CALVIN dataset..." 16 | 17 | unzip task_ABC_D.zip 18 | 19 | echo "Done downloading and unzipping CALVIN dataset." 20 | -------------------------------------------------------------------------------- /data/calvin/hdf5totfrecords.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | import numpy as np 4 | from tqdm import tqdm 5 | 6 | 7 | def _bytes_feature(value): 8 | """Returns a bytes_list from a string / byte.""" 9 | if isinstance(value, type(tf.constant(0))): 10 | value = value.numpy() # BytesList won't unpack a string from an EagerTensor. 11 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 12 | 13 | 14 | def _bool_feature(value): 15 | """Returns a bool_list from a boolean.""" 16 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[int(value)])) 17 | 18 | 19 | def serialize_example(action, robot_obs, rgb_static, rgb_gripper, instruction, terminate_episode): 20 | # Feature for fixed-length fields 21 | feature = { 22 | 'action': _bytes_feature(tf.io.serialize_tensor(action)), 23 | 'robot_obs': _bytes_feature(tf.io.serialize_tensor(robot_obs)), 24 | 'rgb_static': _bytes_feature(tf.io.serialize_tensor(rgb_static)), 25 | 'rgb_gripper': _bytes_feature(tf.io.serialize_tensor(rgb_gripper)), 26 | 'terminate_episode': _bool_feature(terminate_episode), 27 | 'instruction': _bytes_feature(instruction), 28 | } 29 | 30 | example_proto = tf.train.Example(features=tf.train.Features(feature=feature)) 31 | return example_proto.SerializeToString() 32 | 33 | 34 | def write_tfrecords(root_dir, out_dir): 35 | if not os.path.exists(out_dir): 36 | os.makedirs(out_dir) 37 | 38 | # Get the language annotation and corresponding indices 39 | f = np.load(os.path.join(root_dir, "lang_annotations/auto_lang_ann.npy"), allow_pickle=True) 40 | lang = f.item()['language']['ann'] 41 | lang = np.array([x.encode('utf-8') for x in lang]) 42 | lang_start_end_idx = f.item()['info']['indx'] 43 | num_ep = len(lang_start_end_idx) 44 | 45 | with tqdm(total=num_ep) as pbar: 46 | for episode_idx, (start_idx, end_idx) in enumerate(lang_start_end_idx): 47 | pbar.update(1) 48 | 49 | step_files = [ 50 | f"episode_{str(i).zfill(7)}.npz" 51 | for i in range(start_idx, end_idx + 1) 52 | ] 53 | action = [] 54 | robot_obs = [] 55 | rgb_static = [] 56 | rgb_gripper = [] 57 | instr = lang[episode_idx] 58 | for step_file in step_files: 59 | filepath = os.path.join(root_dir, step_file) 60 | f = np.load(filepath) 61 | # Get relevent things 62 | action.append(f['actions']) 63 | robot_obs.append(f['robot_obs']) 64 | rgb_static.append(f['rgb_static']) 65 | rgb_gripper.append(f['rgb_gripper']) 66 | 67 | tfrecord_path = os.path.join(out_dir, f'{episode_idx:07d}.tfrecord') 68 | print(f"Writing TFRecords to {tfrecord_path}") 69 | with tf.io.TFRecordWriter(tfrecord_path) as writer: 70 | for i in range(len(step_files)): 71 | serialized_example = serialize_example( 72 | action[i], robot_obs[i], rgb_static[i], rgb_gripper[i], instr, i == len(step_files) - 1 73 | ) 74 | writer.write(serialized_example) 75 | 76 | output_dirs = [ 77 | '../datasets/calvin/tfrecords/training', 78 | '../datasets/calvin/tfrecords/validation' 79 | ] 80 | 81 | for output_dir in output_dirs: 82 | if not os.path.exists(output_dir): 83 | os.makedirs(output_dir) 84 | 85 | root_dirs = [ 86 | '../datasets/calvin/task_ABC_D/training', 87 | '../datasets/calvin/task_ABC_D/validation' 88 | ] 89 | 90 | for root_dir, output_dir in zip(root_dirs, output_dirs): 91 | print(f"Writing TFRecords to {output_dir}") 92 | write_tfrecords(root_dir, output_dir) 93 | -------------------------------------------------------------------------------- /data/compute_dataset_stat_hdf5.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file will compute the min, max, mean, and standard deviation of each datasets 3 | in `pretrain_datasets.json` or `pretrain_datasets.json`. 4 | """ 5 | 6 | import json 7 | import argparse 8 | 9 | import numpy as np 10 | from tqdm import tqdm 11 | 12 | from data.hdf5_vla_dataset import HDF5VLADataset 13 | 14 | 15 | def process_hdf5_dataset(vla_dataset): 16 | EPS = 1e-8 17 | episode_cnt = 0 18 | state_sum = 0 19 | state_sum_sq = 0 20 | z_state_sum = 0 21 | z_state_sum_sq = 0 22 | state_cnt = 0 23 | nz_state_cnt = None 24 | state_max = None 25 | state_min = None 26 | for i in tqdm(range(len(vla_dataset))): 27 | episode = vla_dataset.get_item(i, state_only=True) 28 | episode_cnt += 1 29 | 30 | states = episode['state'] 31 | 32 | # Zero the values that are close to zero 33 | z_states = states.copy() 34 | z_states[np.abs(states) <= EPS] = 0 35 | # Compute the non-zero count 36 | if nz_state_cnt is None: 37 | nz_state_cnt = np.zeros(states.shape[1]) 38 | nz_state_cnt += np.sum(np.abs(states) > EPS, axis=0) 39 | 40 | # Update statistics 41 | state_sum += np.sum(states, axis=0) 42 | state_sum_sq += np.sum(states**2, axis=0) 43 | z_state_sum += np.sum(z_states, axis=0) 44 | z_state_sum_sq += np.sum(z_states**2, axis=0) 45 | state_cnt += states.shape[0] 46 | if state_max is None: 47 | state_max = np.max(states, axis=0) 48 | state_min = np.min(states, axis=0) 49 | else: 50 | state_max = np.maximum(state_max, np.max(states, axis=0)) 51 | state_min = np.minimum(state_min, np.min(states, axis=0)) 52 | 53 | # Add one to avoid division by zero 54 | nz_state_cnt = np.maximum(nz_state_cnt, np.ones_like(nz_state_cnt)) 55 | 56 | result = { 57 | "dataset_name": vla_dataset.get_dataset_name(), 58 | "state_mean": (state_sum / state_cnt).tolist(), 59 | "state_std": np.sqrt( 60 | np.maximum( 61 | (z_state_sum_sq / nz_state_cnt) - (z_state_sum / state_cnt)**2 * (state_cnt / nz_state_cnt), 62 | np.zeros_like(state_sum_sq) 63 | ) 64 | ).tolist(), 65 | "state_min": state_min.tolist(), 66 | "state_max": state_max.tolist(), 67 | } 68 | 69 | return result 70 | 71 | 72 | if __name__ == "__main__": 73 | parser = argparse.ArgumentParser() 74 | parser.add_argument('--save_path', type=str, 75 | default="configs/dataset_stat.json", 76 | help="JSON file path to save the dataset statistics.") 77 | parser.add_argument('--skip_exist', action='store_true', 78 | help="Whether to skip the existing dataset statistics.") 79 | args = parser.parse_args() 80 | 81 | vla_dataset = HDF5VLADataset() 82 | dataset_name = vla_dataset.get_dataset_name() 83 | 84 | try: 85 | with open(args.save_path, 'r') as f: 86 | results = json.load(f) 87 | except FileNotFoundError: 88 | results = {} 89 | if args.skip_exist and dataset_name in results: 90 | print(f"Skipping existed {dataset_name} dataset statistics") 91 | else: 92 | print(f"Processing {dataset_name} dataset") 93 | result = process_hdf5_dataset(vla_dataset) 94 | results[result["dataset_name"]] = result 95 | with open(args.save_path, 'w') as f: 96 | json.dump(results, f, indent=4) 97 | print("All datasets have been processed.") 98 | -------------------------------------------------------------------------------- /data/empty_lang_embed.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thu-ml/RoboticsDiffusionTransformer/9af5241cb4456836ddf852b5a0286441f7b5d1d6/data/empty_lang_embed.pt -------------------------------------------------------------------------------- /data/filelock.py: -------------------------------------------------------------------------------- 1 | import fcntl 2 | 3 | 4 | class FileLock: 5 | """ 6 | A file lock class. 7 | """ 8 | def __init__(self, filename): 9 | self.filename = filename 10 | self.handle = None 11 | 12 | def acquire_read_lock(self): 13 | self.handle = open(self.filename + '.lock', 'r') 14 | fcntl.flock(self.handle, fcntl.LOCK_SH | fcntl.LOCK_NB) 15 | 16 | def acquire_write_lock(self): 17 | self.handle = open(self.filename + '.lock', 'w') 18 | fcntl.flock(self.handle, fcntl.LOCK_EX | fcntl.LOCK_NB) 19 | 20 | def release_lock(self): 21 | if self.handle is not None: 22 | fcntl.flock(self.handle, fcntl.LOCK_UN) 23 | self.handle.close() 24 | self.handle = None 25 | -------------------------------------------------------------------------------- /data/openx_embod/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the list of dataset names 4 | DATASETS=( 5 | 'fractal20220817_data' 6 | 'kuka' 7 | 'bridge' 8 | 'taco_play' 9 | 'jaco_play' 10 | 'berkeley_cable_routing' 11 | 'roboturk' 12 | 'nyu_door_opening_surprising_effectiveness' 13 | 'viola' 14 | 'berkeley_autolab_ur5' 15 | 'toto' 16 | 'language_table' 17 | 'columbia_cairlab_pusht_real' 18 | 'stanford_kuka_multimodal_dataset_converted_externally_to_rlds' 19 | 'nyu_rot_dataset_converted_externally_to_rlds' 20 | 'stanford_hydra_dataset_converted_externally_to_rlds' 21 | 'austin_buds_dataset_converted_externally_to_rlds' 22 | 'nyu_franka_play_dataset_converted_externally_to_rlds' 23 | 'maniskill_dataset_converted_externally_to_rlds' 24 | 'furniture_bench_dataset_converted_externally_to_rlds' 25 | 'cmu_franka_exploration_dataset_converted_externally_to_rlds' 26 | 'ucsd_kitchen_dataset_converted_externally_to_rlds' 27 | 'ucsd_pick_and_place_dataset_converted_externally_to_rlds' 28 | 'austin_sailor_dataset_converted_externally_to_rlds' 29 | 'austin_sirius_dataset_converted_externally_to_rlds' 30 | 'bc_z' 31 | 'usc_cloth_sim_converted_externally_to_rlds' 32 | 'utokyo_pr2_opening_fridge_converted_externally_to_rlds' 33 | 'utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds' 34 | 'utokyo_saytap_converted_externally_to_rlds' 35 | 'utokyo_xarm_pick_and_place_converted_externally_to_rlds' 36 | 'utokyo_xarm_bimanual_converted_externally_to_rlds' 37 | 'robo_net' 38 | 'berkeley_mvp_converted_externally_to_rlds' 39 | 'berkeley_rpt_converted_externally_to_rlds' 40 | 'kaist_nonprehensile_converted_externally_to_rlds' 41 | 'stanford_mask_vit_converted_externally_to_rlds' 42 | 'tokyo_u_lsmo_converted_externally_to_rlds' 43 | 'dlr_sara_pour_converted_externally_to_rlds' 44 | 'dlr_sara_grid_clamp_converted_externally_to_rlds' 45 | 'dlr_edan_shared_control_converted_externally_to_rlds' 46 | 'asu_table_top_converted_externally_to_rlds' 47 | 'stanford_robocook_converted_externally_to_rlds' 48 | 'eth_agent_affordances' 49 | 'imperialcollege_sawyer_wrist_cam' 50 | 'iamlab_cmu_pickup_insert_converted_externally_to_rlds' 51 | 'uiuc_d3field' 52 | 'utaustin_mutex' 53 | 'berkeley_fanuc_manipulation' 54 | 'cmu_playing_with_food' 55 | 'cmu_play_fusion' 56 | 'cmu_stretch' 57 | 'berkeley_gnm_recon' 58 | 'berkeley_gnm_cory_hall' 59 | 'berkeley_gnm_sac_son' 60 | # Additional dataset 61 | 'droid' 62 | 'fmb' 63 | 'dobbe' 64 | ) 65 | 66 | 67 | # Iterate over each dataset name 68 | for dataset_name in "${DATASETS[@]}"; do 69 | echo "Downloading $dataset_name" 70 | 71 | # Execute gsutil command 72 | ~/miniconda3/envs/rdt-data/bin/gsutil -m cp -n -r -D "gs://gresearch/robotics/$dataset_name" ../datasets/openx_embod/ 73 | 74 | # Check if the resulting directory exists 75 | directory_path="../datasets/openx_embod/$dataset_name" 76 | if [ ! -d "$directory_path" ]; then 77 | # If the directory does not exist, then print an error message 78 | echo "Failed to download $dataset_name" 79 | else 80 | # If the directory exists, then print a success message 81 | echo "Successfully downloaded $dataset_name" 82 | fi 83 | done 84 | -------------------------------------------------------------------------------- /data/preprocess_scripts/__init__.py: -------------------------------------------------------------------------------- 1 | from . import fractal20220817_data 2 | from . import bridge 3 | from . import jaco_play 4 | from . import nyu_door_opening_surprising_effectiveness 5 | from . import taco_play 6 | from . import berkeley_cable_routing 7 | from . import roboturk 8 | from . import viola 9 | from . import berkeley_autolab_ur5 10 | from . import toto 11 | from . import columbia_cairlab_pusht_real 12 | from . import stanford_kuka_multimodal_dataset_converted_externally_to_rlds 13 | from . import nyu_rot_dataset_converted_externally_to_rlds 14 | from . import austin_buds_dataset_converted_externally_to_rlds 15 | from . import nyu_franka_play_dataset_converted_externally_to_rlds 16 | from . import cmu_franka_exploration_dataset_converted_externally_to_rlds 17 | from . import kuka 18 | from . import utokyo_xarm_bimanual_converted_externally_to_rlds 19 | from . import maniskill_dataset_converted_externally_to_rlds 20 | from . import stanford_hydra_dataset_converted_externally_to_rlds 21 | from . import ucsd_kitchen_dataset_converted_externally_to_rlds 22 | from . import ucsd_pick_and_place_dataset_converted_externally_to_rlds 23 | from . import austin_sailor_dataset_converted_externally_to_rlds 24 | from . import austin_sirius_dataset_converted_externally_to_rlds 25 | from . import bc_z 26 | from . import usc_cloth_sim_converted_externally_to_rlds 27 | from . import utokyo_pr2_opening_fridge_converted_externally_to_rlds 28 | from . import utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds 29 | from . import utokyo_xarm_pick_and_place_converted_externally_to_rlds 30 | from . import berkeley_mvp_converted_externally_to_rlds 31 | from . import berkeley_rpt_converted_externally_to_rlds 32 | from . import kaist_nonprehensile_converted_externally_to_rlds 33 | from . import stanford_mask_vit_converted_externally_to_rlds 34 | from . import tokyo_u_lsmo_converted_externally_to_rlds 35 | from . import dlr_sara_pour_converted_externally_to_rlds 36 | from . import dlr_sara_grid_clamp_converted_externally_to_rlds 37 | from . import dlr_edan_shared_control_converted_externally_to_rlds 38 | from . import asu_table_top_converted_externally_to_rlds 39 | from . import stanford_robocook_converted_externally_to_rlds 40 | from . import roboturk_real_laundrylayout 41 | from . import roboturk_real_towercreation 42 | from . import roboturk_real_objectsearch 43 | from . import robomimic_lift_ph 44 | from . import robomimic_can_ph 45 | from . import robomimic_square_ph 46 | from . import robomimic_transport_ph 47 | from . import robomimic_tool_hang_ph 48 | from . import eth_agent_affordances 49 | from . import imperialcollege_sawyer_wrist_cam 50 | from . import iamlab_cmu_pickup_insert_converted_externally_to_rlds 51 | from . import uiuc_d3field 52 | from . import utaustin_mutex 53 | from . import berkeley_fanuc_manipulation 54 | from . import cmu_play_fusion 55 | from . import cmu_stretch 56 | from . import berkeley_gnm_recon 57 | from . import berkeley_gnm_cory_hall 58 | from . import berkeley_gnm_sac_son 59 | from . import language_table 60 | from . import furniture_bench_dataset_converted_externally_to_rlds 61 | from . import robo_net 62 | from . import bridgev2 63 | from . import aloha_mobile 64 | from . import aloha_static 65 | from . import droid 66 | from . import fmb 67 | from . import dobbe 68 | from . import qut_dexterous_manpulation 69 | from . import roboset 70 | from . import agilex 71 | from . import rh20t 72 | from . import calvin -------------------------------------------------------------------------------- /data/preprocess_scripts/asu_table_top_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | # Convert raw action to our action 20 | action = step['action'] 21 | # Robot action, consists of [7x joint velocities, 2x gripper velocities, 1x terminate episode]. 22 | # NOTE the dimension of action is actually 7, so only 7x joint velocities exists 23 | joint_vel = action[:7] 24 | gripper_vel = action[7:9] 25 | # there are extra action_delta information 26 | # Robot delta action, consists of [7x joint velocities, 2x gripper velocities, 1x terminate episode]. 27 | # action_delta = step['action_delta'] 28 | 29 | # Concatenate the action 30 | step['action'] = {} 31 | action = step['action'] 32 | action['arm_concat'] = tf.concat([joint_vel, gripper_vel], axis=0) 33 | action['terminate'] = step['is_terminal'] 34 | 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_vel,gripper_joint_1_vel") 38 | 39 | # Convert raw state to our state 40 | state = step['observation'] 41 | state_vec = state['state'] 42 | # Robot state, consists of [6x robot joint angles, 1x gripper position]. 43 | arm_joint_ang = state_vec[:6] 44 | grip_pos = state_vec[6:7] 45 | # Robot joint velocity, consists of [6x robot joint angles, 1x gripper position]. 46 | state_vel = state['state_vel'] 47 | arm_joint_vel = state_vel[:6] 48 | grip_vel = state_vel[6:7] 49 | 50 | # Concatenate the state 51 | state['arm_concat'] = tf.concat([arm_joint_ang,arm_joint_vel,grip_pos,grip_vel], axis=0) 52 | 53 | # Write the state format 54 | state['format'] = tf.constant( 55 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,gripper_joint_0_pos,gripper_joint_0_vel") 56 | 57 | # Clean the task instruction 58 | # Define the replacements (old, new) as a dictionary 59 | replacements = { 60 | '_': ' ', 61 | '1f': ' ', 62 | '4f': ' ', 63 | '-': ' ', 64 | '50': ' ', 65 | '55': ' ', 66 | '56': ' ', 67 | 68 | } 69 | instr = step['language_instruction'] 70 | instr = clean_task_instruction(instr, replacements) 71 | step['observation']['natural_language_instruction'] = instr 72 | 73 | return step 74 | 75 | 76 | if __name__ == "__main__": 77 | import tensorflow_datasets as tfds 78 | from data.utils import dataset_to_path 79 | 80 | DATASET_DIR = 'data/datasets/openx_embod' 81 | DATASET_NAME = 'fractal20220817_data' 82 | # Load the dataset 83 | dataset = tfds.builder_from_directory( 84 | builder_dir=dataset_to_path( 85 | DATASET_NAME, DATASET_DIR)) 86 | dataset = dataset.as_dataset(split='all') 87 | 88 | # Inspect the dataset 89 | for episode in dataset: 90 | for step in episode['steps']: 91 | print(step) 92 | -------------------------------------------------------------------------------- /data/preprocess_scripts/austin_buds_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, rotation_matrix_to_ortho6d 4 | 5 | def process_step(step: dict) -> dict: 6 | """ 7 | Unify the action format and clean the task instruction. 8 | 9 | DO NOT use python list, use tf.TensorArray instead. 10 | """ 11 | # Convert raw action to our action 12 | 13 | origin_action = step['action'] 14 | step['action']={} 15 | action=step['action'] 16 | action['terminate'] = step['is_terminal'] 17 | 18 | eef_delta_pos = origin_action[:3] 19 | eef_ang=origin_action[3:6] 20 | eef_ang = euler_to_quaternion(eef_ang) 21 | # gripper_open: -1-open, 1-closed 22 | grip_open=tf.where(tf.equal(origin_action[6:],tf.constant(-1.0)),tf.constant(1.0),tf.constant(0.0)) 23 | 24 | # No base found 25 | 26 | # Concatenate the action 27 | action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0) 28 | 29 | # Write the action format 30 | action['format'] = tf.constant( 31 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 32 | 33 | # Convert raw state to our state 34 | state = step['observation'] 35 | # Concatenate the state 36 | eef_mat = tf.transpose(tf.reshape(state['state'][8:], (4, 4))) 37 | eef_pos = eef_mat[:3, 3] 38 | rotaion_matrix = eef_mat[:3, :3] 39 | eef_ang = rotation_matrix_to_ortho6d(rotaion_matrix) 40 | joint_pos = state['state'][:7] 41 | grip_open = state['state'][7:8] * 12.5 # rescale to [0, 1] 42 | state['arm_concat'] = tf.concat([joint_pos,grip_open,eef_pos,eef_ang],axis=0) 43 | 44 | # Write the state format 45 | state['format'] = tf.constant( 46 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 47 | 48 | # Clean the task instruction 49 | # Define the replacements (old, new) as a dictionary 50 | replacements = { 51 | '_': ' ', 52 | '1f': ' ', 53 | '4f': ' ', 54 | '-': ' ', 55 | '50': ' ', 56 | '55': ' ', 57 | '56': ' ', 58 | 59 | } 60 | instr = step['language_instruction'] 61 | instr = clean_task_instruction(instr, replacements) 62 | step['observation']['natural_language_instruction'] = instr 63 | 64 | return step 65 | 66 | 67 | if __name__ == "__main__": 68 | import tensorflow_datasets as tfds 69 | from data.utils import dataset_to_path 70 | 71 | DATASET_DIR = 'data/datasets/openx_embod' 72 | DATASET_NAME = 'austin_buds_dataset_converted_externally_to_rlds' 73 | # Load the dataset 74 | dataset = tfds.builder_from_directory( 75 | builder_dir=dataset_to_path( 76 | DATASET_NAME, DATASET_DIR)) 77 | dataset = dataset.as_dataset(split='all') 78 | 79 | # Inspect the dataset 80 | for episode in dataset: 81 | for step in episode['steps']: 82 | print(step) 83 | -------------------------------------------------------------------------------- /data/preprocess_scripts/austin_sailor_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, \ 4 | quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | # Robot action, consists of [3x ee relative pos, 3x ee relative rotation, 1x gripper action]. 22 | action = step['action'] 23 | eef_delta_pos = action[:3] 24 | eef_ang = action[3:6] 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | grip_open = tf.expand_dims(1 - action[6], axis=0) 27 | 28 | # Concatenate the action 29 | # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 30 | step['action'] = {} 31 | action = step['action'] 32 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 33 | action['terminate'] = step['is_terminal'] 34 | 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 38 | 39 | # Convert raw state to our state 40 | state = step['observation']['state'] 41 | joint_pos = step['observation']['state_joint'] 42 | # Default robot state, consists of [3x robot ee pos, 3x ee quat, 1x gripper state]. 43 | eef_pos = state[:3] 44 | eef_quat = state[3:7] 45 | eef_ang = quaternion_to_rotation_matrix(eef_quat) 46 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 47 | gripper_joint_pos = state[7:8] * 12.85 # rescale to [0, 1] 48 | 49 | # Concatenate the state 50 | state = step['observation'] 51 | state['arm_concat'] = tf.concat([joint_pos,gripper_joint_pos,eef_pos,eef_ang], axis=0) 52 | 53 | # Write the state format 54 | state['format'] = tf.constant( 55 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 56 | 57 | # Clean the task instruction 58 | # Define the replacements (old, new) as a dictionary 59 | replacements = { 60 | '_': ' ', 61 | '1f': ' ', 62 | '4f': ' ', 63 | '-': ' ', 64 | '50': ' ', 65 | '55': ' ', 66 | '56': ' ', 67 | 68 | } 69 | instr = step['language_instruction'] 70 | instr = clean_task_instruction(instr, replacements) 71 | step['observation']['natural_language_instruction'] = instr 72 | 73 | return step 74 | 75 | 76 | if __name__ == "__main__": 77 | import tensorflow_datasets as tfds 78 | from data.utils import dataset_to_path 79 | 80 | DATASET_DIR = 'data/datasets/openx_embod' 81 | DATASET_NAME = 'fractal20220817_data' 82 | # Load the dataset 83 | dataset = tfds.builder_from_directory( 84 | builder_dir=dataset_to_path( 85 | DATASET_NAME, DATASET_DIR)) 86 | dataset = dataset.as_dataset(split='all') 87 | 88 | # Inspect the dataset 89 | for episode in dataset: 90 | for step in episode['steps']: 91 | print(step) 92 | -------------------------------------------------------------------------------- /data/preprocess_scripts/austin_sirius_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | 20 | # Convert raw action to our action 21 | # Robot action, consists of [3x ee relative pos, 3x ee relative rotation, 1x gripper action]. 22 | action = step['action'] 23 | eef_pos = action[:3] 24 | eef_ang = action[3:6] 25 | eef_ang = euler_to_rotation_matrix(eef_ang) 26 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 27 | grip_open = tf.expand_dims(1 - action[6], axis=0) 28 | 29 | # Concatenate the action 30 | # action['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0) 31 | step['action'] = {} 32 | action = step['action'] 33 | 34 | action['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0) 35 | action['terminate'] = step['is_terminal'] 36 | 37 | # Write the action format 38 | action['format'] = tf.constant( 39 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 40 | 41 | # Convert raw state to our state 42 | joint_pos = step['observation']['state'][:7] 43 | grip_open = step['observation']['state'][7:8] * 12.55 44 | state_ee = step['observation']['state_ee'] 45 | # Tensor (16,) End-effector state, represented as 4x4 homogeneous transformation matrix of ee pose. 46 | transform_matrix = tf.transpose(tf.reshape(state_ee, (4, 4))) 47 | eef_pos = transform_matrix[:3, 3] 48 | rotation_matrix = transform_matrix[:3, :3] 49 | eef_ang = rotation_matrix_to_ortho6d(rotation_matrix) 50 | # Concatenate the state 51 | 52 | state = step['observation'] 53 | state['arm_concat'] = tf.concat([joint_pos,grip_open,eef_pos,eef_ang], axis=0) 54 | 55 | # Write the state format 56 | state['format'] = tf.constant( 57 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 58 | 59 | # Clean the task instruction 60 | # Define the replacements (old, new) as a dictionary 61 | replacements = { 62 | '_': ' ', 63 | '1f': ' ', 64 | '4f': ' ', 65 | '-': ' ', 66 | '50': ' ', 67 | '55': ' ', 68 | '56': ' ', 69 | 70 | } 71 | instr = step['language_instruction'] 72 | instr = clean_task_instruction(instr, replacements) 73 | step['observation']['natural_language_instruction'] = instr 74 | 75 | return step 76 | 77 | 78 | if __name__ == "__main__": 79 | import tensorflow_datasets as tfds 80 | from data.utils import dataset_to_path 81 | 82 | DATASET_DIR = 'data/datasets/openx_embod' 83 | DATASET_NAME = 'fractal20220817_data' 84 | # Load the dataset 85 | dataset = tfds.builder_from_directory( 86 | builder_dir=dataset_to_path( 87 | DATASET_NAME, DATASET_DIR)) 88 | dataset = dataset.as_dataset(split='all') 89 | 90 | # Inspect the dataset 91 | for episode in dataset: 92 | for step in episode['steps']: 93 | print(step) 94 | -------------------------------------------------------------------------------- /data/preprocess_scripts/bc_z.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, \ 4 | euler_to_rotation_matrix, rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | 21 | # Convert raw action to our action 22 | action = step['action'] 23 | # The next 10 actions for the positions. Each action is a 3D delta to add to current position. 24 | eef_delta_pos = action['future/xyz_residual'][:3] 25 | # The next 10 actions for the rotation. Each action is a 3D delta to add to the current axis angle. 26 | eef_ang = action['future/axis_angle_residual'][:3] 27 | eef_ang = euler_to_quaternion(eef_ang) 28 | # The next 10 actions for the gripper. Each action is the value the gripper closure should be changed to (notably it is not a delta.) 29 | grip_open = tf.cast(tf.expand_dims(1 - action['future/target_close'][0], axis=0), dtype=tf.float32) 30 | 31 | # Concatenate the action 32 | step['action'] = {} 33 | action = step['action'] 34 | 35 | action['terminate'] = step['is_terminal'] 36 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 37 | 38 | # Write the action format 39 | action['format'] = tf.constant( 40 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 41 | 42 | # Convert raw state to our state 43 | state = step['observation'] 44 | 45 | gripper_ang = state['present/axis_angle'] 46 | gripper_ang = euler_to_rotation_matrix(gripper_ang) 47 | gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) 48 | gripper_pos = state['present/xyz'] 49 | # How much the gripper is currently closed. Scaled from 0 to 1, but not all values from 0 to 1 are reachable. The range in the data is about 0.2 to 1 50 | gripper_open = 1- state['present/sensed_close'] 51 | 52 | 53 | # Concatenate the state 54 | state = step['observation'] 55 | state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0) 56 | 57 | # Write the state format 58 | state['format'] = tf.constant( 59 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 60 | 61 | # Clean the task instruction 62 | # Define the replacements (old, new) as a dictionary 63 | replacements = { 64 | '_': ' ', 65 | '1f': ' ', 66 | '4f': ' ', 67 | '-': ' ', 68 | '50': ' ', 69 | '55': ' ', 70 | '56': ' ', 71 | 72 | } 73 | instr = step['observation']['natural_language_instruction'] 74 | instr = clean_task_instruction(instr, replacements) 75 | step['observation']['natural_language_instruction'] = instr 76 | 77 | return step 78 | 79 | 80 | if __name__ == "__main__": 81 | import tensorflow_datasets as tfds 82 | from data.utils import dataset_to_path 83 | 84 | DATASET_DIR = 'data/datasets/openx_embod' 85 | DATASET_NAME = 'fractal20220817_data' 86 | # Load the dataset 87 | dataset = tfds.builder_from_directory( 88 | builder_dir=dataset_to_path( 89 | DATASET_NAME, DATASET_DIR)) 90 | dataset = dataset.as_dataset(split='all') 91 | 92 | # Inspect the dataset 93 | for episode in dataset: 94 | for step in episode['steps']: 95 | print(step) 96 | -------------------------------------------------------------------------------- /data/preprocess_scripts/berkeley_autolab_ur5.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, \ 4 | quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 23 | eef_delta_pos = action['world_vector'] 24 | eef_ang = action['rotation_delta'] 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | 27 | # Ignore action['gripper_open']: 1 if close gripper, -1 if open gripper, 0 if no change. 28 | 29 | # No base found 30 | 31 | # Concatenate the action 32 | arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) 33 | action['arm_concat'] = arm_action 34 | 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") 38 | 39 | # Convert raw state to our state 40 | state = step['observation'] 41 | # state['robot_state']:[joint0, joint1, joint2, joint3, joint4, joint5, x,y,z, qx,qy,qz,qw, gripper_is_closed, action_blocked] 42 | robot_state = state['robot_state'] 43 | joint_pos=robot_state[:6] 44 | eef_pos = robot_state[6:9] 45 | eef_quat = robot_state[9:13] 46 | eef_ang = quaternion_to_rotation_matrix(eef_quat) 47 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 48 | # gripper_is_closed is binary: 0 = fully open; 1 = fully closed 49 | grip_closed = robot_state[13:14] 50 | grip_open= 1-grip_closed 51 | # action_blocked is binary: 0 = not blocked; 1 = blocked 52 | # action_blocked = robot_state[14:15] 53 | 54 | # Concatenate the state 55 | state['arm_concat'] = tf.concat([joint_pos, grip_open,eef_pos,eef_ang], axis=0) 56 | 57 | # Write the state format 58 | state['format'] = tf.constant( 59 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 60 | 61 | # Clean the task instruction 62 | # Define the replacements (old, new) as a dictionary 63 | replacements = { 64 | '_': ' ', 65 | '1f': ' ', 66 | '4f': ' ', 67 | '-': ' ', 68 | '50': ' ', 69 | '55': ' ', 70 | '56': ' ', 71 | 72 | } 73 | instr = step['observation']['natural_language_instruction'] 74 | instr = clean_task_instruction(instr, replacements) 75 | step['observation']['natural_language_instruction'] = instr 76 | 77 | return step 78 | 79 | 80 | if __name__ == "__main__": 81 | import tensorflow_datasets as tfds 82 | from data.utils import dataset_to_path 83 | 84 | DATASET_DIR = 'data/datasets/openx_embod' 85 | DATASET_NAME = 'berkeley_autolab_ur5' 86 | # Load the dataset 87 | dataset = tfds.builder_from_directory( 88 | builder_dir=dataset_to_path( 89 | DATASET_NAME, DATASET_DIR)) 90 | dataset = dataset.as_dataset(split='all') 91 | 92 | # Inspect the dataset 93 | for episode in dataset: 94 | for step in episode['steps']: 95 | print(step) 96 | -------------------------------------------------------------------------------- /data/preprocess_scripts/berkeley_cable_routing.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32)) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | # Convert raw action to our action 20 | action = step['action'] 21 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 22 | 23 | eef_delta_pos = action['world_vector'] 24 | eef_ang=action['rotation_delta'] 25 | 26 | # No gripper_open found 27 | # No base found 28 | 29 | # Concatenate the action 30 | arm_action=tf.concat([eef_delta_pos,eef_ang],axis=0) 31 | action['arm_concat']=arm_action 32 | #base_action = tf.concat([base_pos, base_ang], axis=0) 33 | #action['base_concat'] = base_action 34 | 35 | # Write the action format 36 | action['format']=tf.constant("eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw") 37 | 38 | # Convert raw state to our state 39 | state = step['observation'] 40 | eef_pos = state['robot_state'][:3] 41 | eef_ang = quaternion_to_rotation_matrix(state['robot_state'][3:]) 42 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 43 | 44 | # Concatenate the state 45 | state['arm_concat']=tf.concat([eef_pos,eef_ang],axis=0) 46 | 47 | # Write the state format 48 | state['format'] = tf.constant( 49 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 50 | 51 | # Define the task instruction 52 | step['observation']['natural_language_instruction'] = tf.constant( 53 | "Route cable through the tight-fitting clip mounted on the table.") 54 | 55 | return step 56 | 57 | 58 | if __name__ == "__main__": 59 | import tensorflow_datasets as tfds 60 | from data.utils import dataset_to_path 61 | 62 | DATASET_DIR = 'data/datasets/openx_embod/' 63 | DATASET_NAME = 'berkeley_cable_routing' 64 | # Load the dataset 65 | dataset = tfds.builder_from_directory( 66 | builder_dir=dataset_to_path( 67 | DATASET_NAME, DATASET_DIR)) 68 | dataset = dataset.as_dataset(split='all') 69 | 70 | # Inspect the dataset 71 | for episode in dataset: 72 | for step in episode['steps']: 73 | print(step) 74 | -------------------------------------------------------------------------------- /data/preprocess_scripts/berkeley_fanuc_manipulation.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, \ 4 | quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | 14 | origin_action = step['action'] 15 | step['action']={} 16 | action=step['action'] 17 | action['terminate'] = step['is_terminal'] 18 | # 6x end effector delta pose, 1x gripper position 19 | eef_delta_pos = origin_action[:3] 20 | eef_ang=origin_action[3:6] 21 | eef_ang = euler_to_quaternion(eef_ang) 22 | # No base found 23 | 24 | # Concatenate the action 25 | action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang],axis=0) 26 | 27 | # Write the action format 28 | action['format'] = tf.constant( 29 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") 30 | 31 | # Convert raw state to our state 32 | state = step['observation'] 33 | # Concatenate the state 34 | # [6x robot joint angles, 1x gripper open status, 6x robot joint velocities]. 35 | arm_joint_ang=state['state'][:6] 36 | grip_open=1-state['state'][6:7] 37 | # arm_joint_vel=state['state'][7:13] # all zeros 38 | eef_pos = state['end_effector_state'][:3] 39 | eef_ang = quaternion_to_rotation_matrix(state['end_effector_state'][3:]) 40 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 41 | state['arm_concat'] = tf.concat([arm_joint_ang,grip_open,eef_pos,eef_ang],axis=0) 42 | 43 | # Write the state format 44 | state['format'] = tf.constant( 45 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 46 | 47 | # Clean the task instruction 48 | # Define the replacements (old, new) as a dictionary 49 | replacements = { 50 | '_': ' ', 51 | '1f': ' ', 52 | '4f': ' ', 53 | '-': ' ', 54 | '50': ' ', 55 | '55': ' ', 56 | '56': ' ', 57 | 58 | } 59 | instr = step['language_instruction'] 60 | instr = clean_task_instruction(instr, replacements) 61 | step['observation']['natural_language_instruction'] = instr 62 | 63 | return step 64 | 65 | 66 | if __name__ == "__main__": 67 | import tensorflow_datasets as tfds 68 | from data.utils import dataset_to_path 69 | 70 | DATASET_DIR = 'data/datasets/openx_embod' 71 | DATASET_NAME = 'berkeley_fanuc_manipulation' 72 | # Load the dataset 73 | dataset = tfds.builder_from_directory( 74 | builder_dir=dataset_to_path( 75 | DATASET_NAME, DATASET_DIR)) 76 | dataset = dataset.as_dataset(split='all') 77 | 78 | # Inspect the dataset 79 | for episode in dataset: 80 | for step in episode['steps']: 81 | print(step) 82 | -------------------------------------------------------------------------------- /data/preprocess_scripts/berkeley_gnm_cory_hall.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ 4 | rotation_matrix_to_ortho6d 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | 14 | origin_action = step['action'] 15 | step['action']={} 16 | action=step['action'] 17 | action['terminate'] = step['is_terminal'] 18 | 19 | eef_pos=tf.cast(origin_action,dtype=tf.float32) 20 | eef_ang=tf.cast(step['action_angle'][2:3],dtype=tf.float32) 21 | eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]],axis=0)) 22 | # No base found 23 | 24 | # Concatenate the action 25 | action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) 26 | 27 | # Write the action format 28 | action['format'] = tf.constant( 29 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") 30 | 31 | # Convert raw state to our state 32 | state = step['observation'] 33 | # Concatenate the state 34 | eef_pos=tf.cast(state['position'],dtype=tf.float32) 35 | eef_ang=tf.cast(state['yaw'],dtype=tf.float32) 36 | eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0)) 37 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 38 | state['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) 39 | # Write the state format 40 | state['format'] = tf.constant( 41 | "eef_pos_x,eef_pos_y,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 42 | 43 | # Clean the task instruction 44 | # Define the replacements (old, new) as a dictionary 45 | replacements = { 46 | '_': ' ', 47 | '1f': ' ', 48 | '4f': ' ', 49 | '-': ' ', 50 | '50': ' ', 51 | '55': ' ', 52 | '56': ' ', 53 | 54 | } 55 | instr = step['language_instruction'] 56 | instr = clean_task_instruction(instr, replacements) 57 | step['observation']['natural_language_instruction'] = instr 58 | 59 | return step 60 | 61 | 62 | if __name__ == "__main__": 63 | import tensorflow_datasets as tfds 64 | from data.utils import dataset_to_path 65 | 66 | DATASET_DIR = 'data/datasets/openx_embod' 67 | DATASET_NAME = 'berkeley_gnm_cory_hall' 68 | # Load the dataset 69 | dataset = tfds.builder_from_directory( 70 | builder_dir=dataset_to_path( 71 | DATASET_NAME, DATASET_DIR)) 72 | dataset = dataset.as_dataset(split='all') 73 | 74 | # Inspect the dataset 75 | for episode in dataset: 76 | for step in episode['steps']: 77 | print(step['action'][6:7]) 78 | 79 | -------------------------------------------------------------------------------- /data/preprocess_scripts/berkeley_gnm_recon.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ 4 | rotation_matrix_to_ortho6d 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | 14 | origin_action = step['action'] 15 | step['action']={} 16 | action=step['action'] 17 | action['terminate'] = step['is_terminal'] 18 | 19 | eef_pos=tf.cast(origin_action,dtype=tf.float32) 20 | eef_ang=tf.cast(step['action_angle'][2:3],dtype=tf.float32) 21 | eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]],axis=0)) 22 | # No base found 23 | 24 | # Concatenate the action 25 | action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) 26 | 27 | # Write the action format 28 | action['format'] = tf.constant( 29 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") 30 | 31 | # Convert raw state to our state 32 | state = step['observation'] 33 | # Concatenate the state 34 | eef_pos=tf.cast(state['position'],dtype=tf.float32) 35 | eef_ang=tf.cast(state['yaw'],dtype=tf.float32) 36 | eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0)) 37 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 38 | state['arm_concat'] = tf.concat([eef_pos/100,eef_ang],axis=0) 39 | # Write the state format 40 | state['format'] = tf.constant( 41 | "eef_pos_x,eef_pos_y,eef_angle_x,eef_angle_y,eef_angle_z,eef_angle_w") 42 | 43 | # Clean the task instruction 44 | # Define the replacements (old, new) as a dictionary 45 | replacements = { 46 | '_': ' ', 47 | '1f': ' ', 48 | '4f': ' ', 49 | '-': ' ', 50 | '50': ' ', 51 | '55': ' ', 52 | '56': ' ', 53 | 54 | } 55 | instr = step['language_instruction'] 56 | instr = clean_task_instruction(instr, replacements) 57 | step['observation']['natural_language_instruction'] = instr 58 | 59 | return step 60 | 61 | 62 | if __name__ == "__main__": 63 | import tensorflow_datasets as tfds 64 | from data.utils import dataset_to_path 65 | 66 | DATASET_DIR = 'data/datasets/openx_embod' 67 | DATASET_NAME = 'berkeley_gnm_recon' 68 | # Load the dataset 69 | dataset = tfds.builder_from_directory( 70 | builder_dir=dataset_to_path( 71 | DATASET_NAME, DATASET_DIR)) 72 | dataset = dataset.as_dataset(split='all') 73 | 74 | # Inspect the dataset 75 | for episode in dataset: 76 | for step in episode['steps']: 77 | print(step['action'][6:7]) 78 | 79 | -------------------------------------------------------------------------------- /data/preprocess_scripts/berkeley_gnm_sac_son.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | 14 | origin_action = step['action'] 15 | step['action']={} 16 | action=step['action'] 17 | action['terminate'] = step['is_terminal'] 18 | 19 | eef_pos=tf.cast(origin_action, dtype=tf.float32) 20 | eef_ang=tf.cast(step['action_angle'][2:3], dtype=tf.float32) 21 | eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]], axis=0)) 22 | # No base found 23 | 24 | # Concatenate the action 25 | action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) 26 | 27 | # Write the action format 28 | action['format'] = tf.constant( 29 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") 30 | 31 | # Convert raw state to our state 32 | state = step['observation'] 33 | # Concatenate the state 34 | eef_pos=tf.cast(state['position'],dtype=tf.float32) 35 | eef_ang=tf.cast(state['yaw'],dtype=tf.float32) 36 | eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0)) 37 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 38 | state['arm_concat'] = tf.concat([eef_pos/100,eef_ang],axis=0) 39 | # Write the state format 40 | state['format'] = tf.constant( 41 | "eef_pos_x,eef_pos_y,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 42 | 43 | # Clean the task instruction 44 | # Define the replacements (old, new) as a dictionary 45 | replacements = { 46 | '_': ' ', 47 | '1f': ' ', 48 | '4f': ' ', 49 | '-': ' ', 50 | '50': ' ', 51 | '55': ' ', 52 | '56': ' ', 53 | 54 | } 55 | instr = step['language_instruction'] 56 | instr = clean_task_instruction(instr, replacements) 57 | step['observation']['natural_language_instruction'] = instr 58 | 59 | return step 60 | 61 | 62 | if __name__ == "__main__": 63 | import tensorflow_datasets as tfds 64 | from data.utils import dataset_to_path 65 | 66 | DATASET_DIR = 'data/datasets/openx_embod' 67 | DATASET_NAME = 'berkeley_gnm_sac_son' 68 | # Load the dataset 69 | dataset = tfds.builder_from_directory( 70 | builder_dir=dataset_to_path( 71 | DATASET_NAME, DATASET_DIR)) 72 | dataset = dataset.as_dataset(split='all') 73 | 74 | # Inspect the dataset 75 | for episode in dataset: 76 | for step in episode['steps']: 77 | print(step['action'][6:7]) 78 | 79 | -------------------------------------------------------------------------------- /data/preprocess_scripts/berkeley_mvp_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | # Robot action, consists of [7 delta joint pos,1x gripper binary state]. 23 | delta_joint_pos = action[:7] 24 | grip_open = tf.expand_dims(1 - action[7], axis=0) 25 | 26 | # Concatenate the action 27 | # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 28 | step['action'] = {} 29 | action = step['action'] 30 | action['arm_concat'] = tf.concat([delta_joint_pos, grip_open], axis=0) 31 | action['terminate'] = step['is_terminal'] 32 | 33 | # Write the action format 34 | action['format'] = tf.constant( 35 | "arm_joint_0_delta_pos,arm_joint_1_delta_pos,arm_joint_2_delta_pos,arm_joint_3_delta_pos,arm_joint_4_delta_pos,arm_joint_5_delta_pos,arm_joint_6_delta_pos,gripper_open") 36 | 37 | # Convert raw state to our state 38 | state = step['observation'] 39 | # xArm joint positions (7 DoF). 40 | arm_joint_pos = state['joint_pos'] 41 | # Binary gripper state (1 - closed, 0 - open) 42 | grip_open = tf.expand_dims(1 - tf.cast(state['gripper'],dtype=tf.float32), axis=0) 43 | # Gripper pose, robot frame, [3 position, 4 rotation] 44 | gripper_pose = state['pose'][:3] 45 | # gripper_ang = quaternion_to_euler(state['pose'][3:7]) 46 | gripper_ang = quaternion_to_rotation_matrix(state['pose'][3:7]) 47 | gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) 48 | 49 | # Concatenate the state 50 | state['arm_concat'] = tf.concat([arm_joint_pos, gripper_pose,gripper_ang, grip_open], axis=0) 51 | 52 | # Write the state format 53 | state['format'] = tf.constant( 54 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 55 | 56 | # Clean the task instruction 57 | # Define the replacements (old, new) as a dictionary 58 | replacements = { 59 | '_': ' ', 60 | '1f': ' ', 61 | '4f': ' ', 62 | '-': ' ', 63 | '50': ' ', 64 | '55': ' ', 65 | '56': ' ', 66 | 67 | } 68 | instr = step['language_instruction'] 69 | instr = clean_task_instruction(instr, replacements) 70 | step['observation']['natural_language_instruction'] = instr 71 | 72 | return step 73 | 74 | 75 | if __name__ == "__main__": 76 | import tensorflow_datasets as tfds 77 | from data.utils import dataset_to_path 78 | 79 | DATASET_DIR = 'data/datasets/openx_embod' 80 | DATASET_NAME = 'fractal20220817_data' 81 | # Load the dataset 82 | dataset = tfds.builder_from_directory( 83 | builder_dir=dataset_to_path( 84 | DATASET_NAME, DATASET_DIR)) 85 | dataset = dataset.as_dataset(split='all') 86 | 87 | # Inspect the dataset 88 | for episode in dataset: 89 | for step in episode['steps']: 90 | print(step) 91 | -------------------------------------------------------------------------------- /data/preprocess_scripts/berkeley_rpt_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | # Convert raw action to our action 20 | action = step['action'] 21 | # Robot action, consists of [7 delta joint pos,1x gripper binary state]. 22 | delta_joint_pos = action[:7] 23 | grip_open = tf.expand_dims(1 - action[7], axis=0) 24 | 25 | # Concatenate the action 26 | # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 27 | step['action'] = {} 28 | action = step['action'] 29 | action['arm_concat'] = tf.concat([delta_joint_pos, grip_open], axis=0) 30 | action['terminate'] = step['is_terminal'] 31 | 32 | # Write the action format 33 | action['format'] = tf.constant( 34 | "arm_joint_0_delta_pos,arm_joint_1_delta_pos,arm_joint_2_delta_pos,arm_joint_3_delta_pos,arm_joint_4_delta_pos,arm_joint_5_delta_pos,arm_joint_6_delta_pos,gripper_open") 35 | 36 | # Convert raw state to our state 37 | state = step['observation'] 38 | # xArm joint positions (7 DoF). 39 | arm_joint_pos = state['joint_pos'] 40 | # Binary gripper state (1 - closed, 0 - open) 41 | grip_open = tf.expand_dims(1 - tf.cast(state['gripper'],dtype=tf.float32), axis=0) 42 | 43 | # Concatenate the state 44 | state['arm_concat'] = tf.concat([arm_joint_pos, grip_open], axis=0) 45 | 46 | # Write the state format 47 | state['format'] = tf.constant( 48 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open") 49 | 50 | # Clean the task instruction 51 | # Define the replacements (old, new) as a dictionary 52 | replacements = { 53 | '_': ' ', 54 | '1f': ' ', 55 | '4f': ' ', 56 | '-': ' ', 57 | '50': ' ', 58 | '55': ' ', 59 | '56': ' ', 60 | 61 | } 62 | instr = step['language_instruction'] 63 | instr = clean_task_instruction(instr, replacements) 64 | step['observation']['natural_language_instruction'] = instr 65 | 66 | return step 67 | 68 | 69 | if __name__ == "__main__": 70 | import tensorflow_datasets as tfds 71 | from data.utils import dataset_to_path 72 | 73 | DATASET_DIR = 'data/datasets/openx_embod' 74 | DATASET_NAME = 'fractal20220817_data' 75 | # Load the dataset 76 | dataset = tfds.builder_from_directory( 77 | builder_dir=dataset_to_path( 78 | DATASET_NAME, DATASET_DIR)) 79 | dataset = dataset.as_dataset(split='all') 80 | 81 | # Inspect the dataset 82 | for episode in dataset: 83 | for step in episode['steps']: 84 | print(step) 85 | -------------------------------------------------------------------------------- /data/preprocess_scripts/bridge.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32)) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 23 | 24 | eef_delta_pos = action['world_vector'] 25 | eef_ang=action['rotation_delta'] 26 | eef_ang = euler_to_quaternion(eef_ang) 27 | grip_open=tf.reshape(tf.where(action['open_gripper'],1.0, 0.0),(1,)) 28 | # grip_open:tensor 29 | 30 | # No base found 31 | 32 | # Concatenate the action 33 | arm_action=tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0) 34 | action['arm_concat']=arm_action 35 | #base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0) 36 | #action['base_concat'] = base_action 37 | 38 | # Write the action format 39 | action['format']=tf.constant("eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 40 | 41 | # Convert raw state to our state 42 | state= step['observation'] 43 | eef_pos=state['state'][:3] 44 | eef_ang=state['state'][3:6] 45 | eef_ang = euler_to_rotation_matrix(eef_ang) 46 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 47 | gripper_action=state['state'][6:] 48 | 49 | # Concatenate the state 50 | state['arm_concat']=tf.concat([eef_pos,eef_ang,gripper_action],axis=0) 51 | 52 | # Write the state format 53 | state['format'] = tf.constant( 54 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 55 | 56 | # Clean the task instruction 57 | # Define the replacements (old, new) as a dictionary 58 | replacements = { 59 | '_': ' ', 60 | '1f': ' ', 61 | '4f': ' ', 62 | '-': ' ', 63 | '50': ' ', 64 | '55': ' ', 65 | '56': ' ', 66 | 67 | } 68 | instr = step['observation']['natural_language_instruction'] 69 | instr = clean_task_instruction(instr, replacements) 70 | step['observation']['natural_language_instruction'] = instr 71 | 72 | return step 73 | 74 | 75 | if __name__ == "__main__": 76 | import tensorflow_datasets as tfds 77 | from data.utils import dataset_to_path 78 | 79 | DATASET_DIR = 'data/datasets/openx_embod/' 80 | DATASET_NAME = 'bridge' 81 | # Load the dataset 82 | dataset = tfds.builder_from_directory( 83 | builder_dir=dataset_to_path( 84 | DATASET_NAME, DATASET_DIR)) 85 | dataset = dataset.as_dataset(split='all') 86 | 87 | # Inspect the dataset 88 | for episode in dataset: 89 | for step in episode['steps']: 90 | print(step) 91 | -------------------------------------------------------------------------------- /data/preprocess_scripts/cmu_franka_exploration_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler,euler_to_quaternion 4 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 5 | """ 6 | Convert terminate action to a boolean, where True means terminate. 7 | """ 8 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 9 | 10 | def process_step(step: dict) -> dict: 11 | """ 12 | Unify the action format and clean the task instruction. 13 | 14 | DO NOT use python list, use tf.TensorArray instead. 15 | """ 16 | # Convert raw action to our action 17 | 18 | origin_action = step['action'] 19 | step['action']={} 20 | action=step['action'] 21 | action['terminate']=terminate_act_to_bool(origin_action[7]) 22 | 23 | # gripper_open: 1-open, 0-closed 24 | 25 | eef_pos=origin_action[:3] 26 | eef_ang=origin_action[3:6] 27 | eef_ang = euler_to_quaternion(eef_ang) 28 | grip_open=origin_action[6:7] 29 | # No base found 30 | 31 | # Concatenate the action 32 | action['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0) 33 | 34 | # Write the action format 35 | action['format'] = tf.constant( 36 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 37 | 38 | # No state found 39 | 40 | # Clean the task instruction 41 | # Define the replacements (old, new) as a dictionary 42 | replacements = { 43 | '_': ' ', 44 | '1f': ' ', 45 | '4f': ' ', 46 | '-': ' ', 47 | '50': ' ', 48 | '55': ' ', 49 | '56': ' ', 50 | 51 | } 52 | instr = step['language_instruction'] 53 | instr = clean_task_instruction(instr, replacements) 54 | step['observation']['natural_language_instruction'] = instr 55 | 56 | return step 57 | 58 | 59 | if __name__ == "__main__": 60 | import tensorflow_datasets as tfds 61 | from data.utils import dataset_to_path 62 | 63 | DATASET_DIR = 'data/datasets/openx_embod' 64 | DATASET_NAME = 'cmu_franka_exploration_dataset_converted_externally_to_rlds' 65 | # Load the dataset 66 | dataset = tfds.builder_from_directory( 67 | builder_dir=dataset_to_path( 68 | DATASET_NAME, DATASET_DIR)) 69 | dataset = dataset.as_dataset(split='all') 70 | 71 | # Inspect the dataset 72 | for episode in dataset: 73 | for step in episode['steps']: 74 | print(step['action'][6:7]) 75 | 76 | -------------------------------------------------------------------------------- /data/preprocess_scripts/cmu_play_fusion.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler 4 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 5 | """ 6 | Convert terminate action to a boolean, where True means terminate. 7 | """ 8 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 9 | 10 | def process_step(step: dict) -> dict: 11 | """ 12 | Unify the action format and clean the task instruction. 13 | 14 | DO NOT use python list, use tf.TensorArray instead. 15 | """ 16 | # Convert raw action to our action 17 | 18 | origin_action = step['action'] 19 | step['action']={} 20 | action=step['action'] 21 | action['terminate']=terminate_act_to_bool(origin_action[8]) 22 | 23 | 24 | eef_pos=origin_action[:3] 25 | # eef_ang=quaternion_to_euler(origin_action[3:7]) 26 | eef_ang = origin_action[3:7] 27 | grip_open=origin_action[7:8] 28 | # No base found 29 | 30 | # Concatenate the action 31 | action['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0) 32 | 33 | # Write the action format 34 | action['format'] = tf.constant( 35 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 36 | 37 | # Convert raw state to our state 38 | state = step['observation'] 39 | # Concatenate the state 40 | arm_joint_ang=state['state'][:7] 41 | grip_open=state['state'][7:8] * 11.765 # rescale to [0, 1] 42 | state['arm_concat'] = tf.concat([arm_joint_ang,grip_open],axis=0) 43 | # Write the state format 44 | state['format'] = tf.constant( 45 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos") 46 | 47 | # Clean the task instruction 48 | # Define the replacements (old, new) as a dictionary 49 | replacements = { 50 | '_': ' ', 51 | '1f': ' ', 52 | '4f': ' ', 53 | '-': ' ', 54 | '50': ' ', 55 | '55': ' ', 56 | '56': ' ', 57 | 58 | } 59 | instr = step['language_instruction'] 60 | instr = clean_task_instruction(instr, replacements) 61 | step['observation']['natural_language_instruction'] = instr 62 | 63 | return step 64 | 65 | 66 | if __name__ == "__main__": 67 | import tensorflow_datasets as tfds 68 | from data.utils import dataset_to_path 69 | 70 | DATASET_DIR = 'data/datasets/openx_embod' 71 | DATASET_NAME = 'cmu_play_fusion' 72 | # Load the dataset 73 | dataset = tfds.builder_from_directory( 74 | builder_dir=dataset_to_path( 75 | DATASET_NAME, DATASET_DIR)) 76 | dataset = dataset.as_dataset(split='all') 77 | 78 | # Inspect the dataset 79 | for episode in dataset: 80 | for step in episode['steps']: 81 | print(step['action'][6:7]) 82 | 83 | -------------------------------------------------------------------------------- /data/preprocess_scripts/cmu_stretch.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler,euler_to_quaternion 4 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 5 | """ 6 | Convert terminate action to a boolean, where True means terminate. 7 | """ 8 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 9 | 10 | def process_step(step: dict) -> dict: 11 | """ 12 | Unify the action format and clean the task instruction. 13 | 14 | DO NOT use python list, use tf.TensorArray instead. 15 | """ 16 | # Convert raw action to our action 17 | 18 | origin_action = step['action'] 19 | step['action']={} 20 | action=step['action'] 21 | action['terminate']=terminate_act_to_bool(origin_action[7]) 22 | 23 | 24 | eef_pos=origin_action[:3] 25 | eef_ang=origin_action[3:6] 26 | eef_ang = euler_to_quaternion(eef_ang) 27 | grip_open=origin_action[6:7] 28 | # No base found 29 | 30 | # Concatenate the action 31 | action['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0) 32 | 33 | # Write the action format 34 | action['format'] = tf.constant( 35 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 36 | 37 | # Convert raw state to our state 38 | state = step['observation'] 39 | # Concatenate the state 40 | eef_pos_x = state['state'][0:1] 41 | eef_pos_z = state['state'][2:3] 42 | grip_open = state['state'][3:4] 43 | state['arm_concat'] = tf.concat( 44 | [eef_pos_x, eef_pos_z, grip_open], axis=0) 45 | # Write the state format 46 | state['format'] = tf.constant( 47 | "eef_pos_x,eef_pos_z,gripper_open") 48 | 49 | # Clean the task instruction 50 | # Define the replacements (old, new) as a dictionary 51 | replacements = { 52 | '_': ' ', 53 | '1f': ' ', 54 | '4f': ' ', 55 | '-': ' ', 56 | '50': ' ', 57 | '55': ' ', 58 | '56': ' ', 59 | 60 | } 61 | instr = step['language_instruction'] 62 | instr = clean_task_instruction(instr, replacements) 63 | step['observation']['natural_language_instruction'] = instr 64 | 65 | return step 66 | 67 | 68 | if __name__ == "__main__": 69 | import tensorflow_datasets as tfds 70 | from data.utils import dataset_to_path 71 | 72 | DATASET_DIR = 'data/datasets/openx_embod' 73 | DATASET_NAME = 'cmu_stretch' 74 | # Load the dataset 75 | dataset = tfds.builder_from_directory( 76 | builder_dir=dataset_to_path( 77 | DATASET_NAME, DATASET_DIR)) 78 | dataset = dataset.as_dataset(split='all') 79 | 80 | # Inspect the dataset 81 | for episode in dataset: 82 | for step in episode['steps']: 83 | print(step['action'][6:7]) 84 | 85 | -------------------------------------------------------------------------------- /data/preprocess_scripts/columbia_cairlab_pusht_real.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | # Convert raw action to our action 20 | action = step['action'] 21 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 22 | 23 | eef_delta_pos = action['world_vector'] 24 | eef_ang = action['rotation_delta'] 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | 27 | # Ignore action['gripper_open']: 1 if close gripper, -1 if open gripper, 0 if no change. 28 | 29 | # No base found 30 | 31 | # Concatenate the action 32 | arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) 33 | action['arm_concat'] = arm_action 34 | 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") 38 | 39 | # Convert raw state to our state 40 | state = step['observation'] 41 | eef_pos = state['robot_state'] 42 | 43 | # Concatenate the state 44 | state['arm_concat'] = eef_pos 45 | 46 | # Write the state format 47 | state['format'] = tf.constant( 48 | "eef_pos_x,eef_pos_y") 49 | 50 | # Clean the task instruction 51 | # Define the replacements (old, new) as a dictionary 52 | replacements = { 53 | '_': ' ', 54 | '1f': ' ', 55 | '4f': ' ', 56 | '-': ' ', 57 | '50': ' ', 58 | '55': ' ', 59 | '56': ' ', 60 | 61 | } 62 | instr = step['observation']['natural_language_instruction'] 63 | instr = clean_task_instruction(instr, replacements) 64 | step['observation']['natural_language_instruction'] = instr 65 | 66 | return step 67 | 68 | 69 | if __name__ == "__main__": 70 | import tensorflow_datasets as tfds 71 | from data.utils import dataset_to_path 72 | 73 | DATASET_DIR = 'data/datasets/openx_embod' 74 | DATASET_NAME = 'columbia_cairlab_pusht_real' 75 | # Load the dataset 76 | dataset = tfds.builder_from_directory( 77 | builder_dir=dataset_to_path( 78 | DATASET_NAME, DATASET_DIR)) 79 | dataset = dataset.as_dataset(split='all') 80 | 81 | # Inspect the dataset 82 | for episode in dataset: 83 | for step in episode['steps']: 84 | print(step) 85 | -------------------------------------------------------------------------------- /data/preprocess_scripts/dlr_edan_shared_control_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class]. 23 | eef_delta_pos = action[:3] 24 | eef_ang = tf.gather(action[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close. 27 | grip_open = tf.expand_dims(1 - action[6], axis=0) 28 | 29 | # Concatenate the action 30 | step['action'] = {} 31 | action = step['action'] 32 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0) 33 | action['terminate'] = step['is_terminal'] 34 | 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 38 | 39 | # Convert raw state to our state 40 | state = step['observation'] 41 | state_vec = state['state'] 42 | # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler("zxy") Class, 6x robot EEF wrench]. 43 | eef_pos = state_vec[:3] 44 | eef_ang = tf.gather(state_vec[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") 45 | eef_ang = euler_to_rotation_matrix(eef_ang) 46 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 47 | grip_open = tf.expand_dims(1 - state_vec[6], axis=0) 48 | # Concatenate the state 49 | state['arm_concat'] = tf.concat([eef_pos, eef_ang,grip_open], axis=0) 50 | 51 | # Write the state format 52 | state['format'] = tf.constant( 53 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 54 | 55 | # Clean the task instruction 56 | # Define the replacements (old, new) as a dictionary 57 | replacements = { 58 | '_': ' ', 59 | '1f': ' ', 60 | '4f': ' ', 61 | '-': ' ', 62 | '50': ' ', 63 | '55': ' ', 64 | '56': ' ', 65 | 66 | } 67 | instr = step['language_instruction'] 68 | instr = clean_task_instruction(instr, replacements) 69 | step['observation']['natural_language_instruction'] = instr 70 | 71 | return step 72 | 73 | 74 | if __name__ == "__main__": 75 | import tensorflow_datasets as tfds 76 | from data.utils import dataset_to_path 77 | 78 | DATASET_DIR = 'data/datasets/openx_embod' 79 | DATASET_NAME = 'fractal20220817_data' 80 | # Load the dataset 81 | dataset = tfds.builder_from_directory( 82 | builder_dir=dataset_to_path( 83 | DATASET_NAME, DATASET_DIR)) 84 | dataset = dataset.as_dataset(split='all') 85 | 86 | # Inspect the dataset 87 | for episode in dataset: 88 | for step in episode['steps']: 89 | print(step) 90 | -------------------------------------------------------------------------------- /data/preprocess_scripts/dlr_sara_grid_clamp_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class]. 23 | eef_delta_pos = action[:3] 24 | eef_ang = tf.gather(action[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close. 27 | grip_open = tf.expand_dims(1 - action[6], axis=0) 28 | 29 | # Concatenate the action 30 | step['action'] = {} 31 | action = step['action'] 32 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0) 33 | action['terminate'] = step['is_terminal'] 34 | 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 38 | 39 | # Convert raw state to our state 40 | state = step['observation'] 41 | state_vec = state['state'] 42 | # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler("zxy") Class, 6x robot EEF wrench]. 43 | eef_pos = state_vec[:3] 44 | eef_ang = tf.gather(state_vec[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") 45 | eef_ang = euler_to_rotation_matrix(eef_ang) 46 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 47 | # joint_pos = state_vec[6:12] # EEF wrench is not the joint posit 48 | # Concatenate the state 49 | # state['arm_concat'] = tf.concat([joint_pos,eef_pos, eef_ang], axis=0) 50 | state['arm_concat'] = tf.concat([eef_pos, eef_ang], axis=0) 51 | 52 | # Write the state format 53 | state['format'] = tf.constant( 54 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 55 | 56 | # Define the task instruction 57 | step['observation']['natural_language_instruction'] = tf.constant( 58 | "Place the grid clamp on the table, aligning the protrusion on its bottom with the slots on the table.") 59 | 60 | return step 61 | 62 | 63 | if __name__ == "__main__": 64 | import tensorflow_datasets as tfds 65 | from data.utils import dataset_to_path 66 | 67 | DATASET_DIR = 'data/datasets/openx_embod' 68 | DATASET_NAME = 'fractal20220817_data' 69 | # Load the dataset 70 | dataset = tfds.builder_from_directory( 71 | builder_dir=dataset_to_path( 72 | DATASET_NAME, DATASET_DIR)) 73 | dataset = dataset.as_dataset(split='all') 74 | 75 | # Inspect the dataset 76 | for episode in dataset: 77 | for step in episode['steps']: 78 | print(step) 79 | -------------------------------------------------------------------------------- /data/preprocess_scripts/dlr_sara_pour_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class]. 23 | eef_delta_pos = action[:3] 24 | eef_ang = tf.gather(action[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close. 27 | grip_open = tf.expand_dims(1 - action[6], axis=0) 28 | # Concatenate the action 29 | step['action'] = {} 30 | action = step['action'] 31 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0) 32 | action['terminate'] = step['is_terminal'] 33 | 34 | # Write the action format 35 | action['format'] = tf.constant( 36 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 37 | 38 | # Convert raw state to our state 39 | state = step['observation'] 40 | state_vec = state['state'] 41 | # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class]. 42 | eef_pos = state_vec[:3] 43 | eef_ang = tf.gather(state_vec[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") 44 | eef_ang = euler_to_rotation_matrix(eef_ang) 45 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 46 | 47 | # Concatenate the state 48 | state['arm_concat'] = tf.concat([ eef_pos, eef_ang], axis=0) 49 | 50 | # Write the state format 51 | state['format'] = tf.constant( 52 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 53 | 54 | # Clean the task instruction 55 | # Define the replacements (old, new) as a dictionary 56 | replacements = { 57 | '_': ' ', 58 | '1f': ' ', 59 | '4f': ' ', 60 | '-': ' ', 61 | '50': ' ', 62 | '55': ' ', 63 | '56': ' ', 64 | 65 | } 66 | instr = step['language_instruction'] 67 | instr = clean_task_instruction(instr, replacements) 68 | step['observation']['natural_language_instruction'] = instr 69 | 70 | return step 71 | 72 | 73 | if __name__ == "__main__": 74 | import tensorflow_datasets as tfds 75 | from data.utils import dataset_to_path 76 | 77 | DATASET_DIR = 'data/datasets/openx_embod' 78 | DATASET_NAME = 'fractal20220817_data' 79 | # Load the dataset 80 | dataset = tfds.builder_from_directory( 81 | builder_dir=dataset_to_path( 82 | DATASET_NAME, DATASET_DIR)) 83 | dataset = dataset.as_dataset(split='all') 84 | 85 | # Inspect the dataset 86 | for episode in dataset: 87 | for step in episode['steps']: 88 | print(step) 89 | -------------------------------------------------------------------------------- /data/preprocess_scripts/dobbe.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d 4 | 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | arm_action = step['action'] 14 | 15 | # Concatenate the action 16 | step['action'] = {} 17 | action = step['action'] 18 | action['arm_concat'] = arm_action 19 | # Write the action format 20 | action['format'] = tf.constant( 21 | "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open") 22 | action['terminate'] = step['is_terminal'] 23 | 24 | # The state has any problem? 25 | state = step['observation'] 26 | eef_pos = state['xyz'] 27 | # Clip eef_pos to be [-10, 10] for stability 28 | eef_pos = tf.clip_by_value(eef_pos, -10, 10) 29 | eef_ang = state['rot'] 30 | eef_ang = euler_to_rotation_matrix(eef_ang) 31 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 32 | grip_pos = state['gripper'] 33 | 34 | # Concatenate the state 35 | state['arm_concat'] = tf.concat([ 36 | grip_pos,eef_pos,eef_ang], axis=0) 37 | 38 | # Write the state format 39 | state['format'] = tf.constant( 40 | "gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 41 | 42 | # Clean the task instruction 43 | # Define the replacements (old, new) as a dictionary 44 | replacements = { 45 | '_': ' ', 46 | '1f': ' ', 47 | '4f': ' ', 48 | '-': ' ', 49 | '50': ' ', 50 | '55': ' ', 51 | '56': ' ', 52 | } 53 | instr = step['language_instruction'] 54 | instr = clean_task_instruction(instr, replacements) 55 | step['observation']['natural_language_instruction'] = instr 56 | 57 | return step 58 | 59 | 60 | if __name__ == "__main__": 61 | import tensorflow_datasets as tfds 62 | from data.utils import dataset_to_path 63 | from tqdm import tqdm 64 | import numpy as np 65 | 66 | DATASET_DIR = 'data/datasets/openx_embod' 67 | DATASET_NAME = 'dobbe' 68 | # Load the dataset 69 | dataset = tfds.builder_from_directory( 70 | builder_dir=dataset_to_path( 71 | DATASET_NAME, DATASET_DIR)) 72 | dataset = dataset.as_dataset(split='all') 73 | # dataset = dataset.filter( 74 | # lambda x: tf.math.less( 75 | # tf.math.reduce_max(tf.math.abs( 76 | # tf.convert_to_tensor( 77 | # [step['observation']['xyz'] for step in x['steps']]))), 3)) 78 | 79 | # Inspect the dataset 80 | for i, episode in tqdm(enumerate(dataset), total=5208): 81 | res = [] 82 | for step in episode['steps']: 83 | res.append(step['observation']['xyz'].numpy()) 84 | max_val = np.max(np.abs(res)) 85 | if max_val > 2: 86 | print(f"Episode {i} has a max value of {max_val}") 87 | -------------------------------------------------------------------------------- /data/preprocess_scripts/droid.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d 4 | 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | action_dict = step['action_dict'] 14 | 15 | # Robot action 16 | eef_pos = action_dict['cartesian_position'][:3] 17 | eef_ang = action_dict['cartesian_position'][3:6] 18 | eef_ang = euler_to_rotation_matrix(eef_ang) 19 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 20 | eef_pos_vel = action_dict['cartesian_velocity'][:3] 21 | eef_ang_vel = action_dict['cartesian_velocity'][3:6] 22 | joint_pos = action_dict['joint_position'] 23 | joint_vel = action_dict['joint_velocity'] 24 | grip_pos = action_dict['gripper_position'] 25 | grip_vel = action_dict['gripper_velocity'] 26 | 27 | # Concatenate the action 28 | step['action'] = {} 29 | action = step['action'] 30 | 31 | arm_action = tf.concat([eef_pos, eef_ang, eef_pos_vel, eef_ang_vel, joint_pos, joint_vel, grip_pos, grip_vel], axis=0) 32 | action['arm_concat'] = arm_action 33 | action['terminate'] = step['is_terminal'] 34 | 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_0_vel") 38 | 39 | # Convert raw state to our state 40 | # Robot state 41 | state = step['observation'] 42 | eef_pos = state['cartesian_position'][:3] 43 | eef_ang = state['cartesian_position'][3:6] 44 | eef_ang = euler_to_rotation_matrix(eef_ang) 45 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 46 | joint_pos = state['joint_position'] 47 | grip_pos = 1 - state['gripper_position'] 48 | 49 | # Concatenate the state 50 | state['arm_concat'] = tf.concat([ 51 | joint_pos,grip_pos,eef_pos,eef_ang], axis=0) 52 | 53 | 54 | # Write the state format 55 | state['format'] = tf.constant( 56 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 57 | 58 | # Clean the task instruction 59 | # Define the replacements (old, new) as a dictionary 60 | replacements = { 61 | '_': ' ', 62 | '1f': ' ', 63 | '4f': ' ', 64 | '-': ' ', 65 | '50': ' ', 66 | '55': ' ', 67 | '56': ' ', 68 | 69 | } 70 | instr = step['language_instruction'] 71 | instr = clean_task_instruction(instr, replacements) 72 | step['observation']['natural_language_instruction'] = instr 73 | 74 | return step 75 | 76 | 77 | if __name__ == "__main__": 78 | pass 79 | -------------------------------------------------------------------------------- /data/preprocess_scripts/eth_agent_affordances.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d 4 | 5 | def process_step(step: dict) -> dict: 6 | """ 7 | Unify the action format and clean the task instruction. 8 | 9 | DO NOT use python list, use tf.TensorArray instead. 10 | """ 11 | # Convert raw action to our action 12 | 13 | origin_action = step['action'] 14 | step['action']={} 15 | action=step['action'] 16 | action['terminate'] = step['is_terminal'] 17 | 18 | eef_vel = origin_action[:3] 19 | eef_ang_vel=origin_action[3:6] 20 | # No base found 21 | 22 | # Concatenate the action 23 | action['arm_concat'] = tf.concat([eef_vel,eef_ang_vel],axis=0) 24 | 25 | # Write the action format 26 | action['format'] = tf.constant( 27 | "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw") 28 | 29 | # Convert raw state to our state 30 | state = step['observation'] 31 | # Concatenate the state 32 | eef_pos = state['state'][:3] 33 | eef_ang = tf.gather(state['state'][3:6], [2, 1, 0]) 34 | eef_ang = euler_to_rotation_matrix(eef_ang) 35 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 36 | grip_open=state['state'][6:7] 37 | # state['state'][8]:door opening angle 38 | state['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0) 39 | 40 | # Write the state format 41 | state['format'] = tf.constant( 42 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 43 | # Clean the task instruction 44 | # Define the replacements (old, new) as a dictionary 45 | replacements = { 46 | '_': ' ', 47 | '1f': ' ', 48 | '4f': ' ', 49 | '-': ' ', 50 | '50': ' ', 51 | '55': ' ', 52 | '56': ' ', 53 | 54 | } 55 | instr = step['language_instruction'] 56 | instr = clean_task_instruction(instr, replacements) 57 | step['observation']['natural_language_instruction'] = instr 58 | 59 | return step 60 | 61 | 62 | if __name__ == "__main__": 63 | import tensorflow_datasets as tfds 64 | from data.utils import dataset_to_path 65 | 66 | DATASET_DIR = 'data/datasets/openx_embod' 67 | DATASET_NAME = 'eth_agent_affordances' 68 | # Load the dataset 69 | dataset = tfds.builder_from_directory( 70 | builder_dir=dataset_to_path( 71 | DATASET_NAME, DATASET_DIR)) 72 | dataset = dataset.as_dataset(split='all') 73 | 74 | # Inspect the dataset 75 | for episode in dataset: 76 | for step in episode['steps']: 77 | print(step) 78 | -------------------------------------------------------------------------------- /data/preprocess_scripts/fmb.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d, \ 4 | quaternion_to_rotation_matrix 5 | 6 | 7 | def process_step(step: dict) -> dict: 8 | """ 9 | Unify the action format and clean the task instruction. 10 | 11 | DO NOT use python list, use tf.TensorArray instead. 12 | """ 13 | # Convert raw action to our action 14 | action = step['action'] 15 | 16 | # Robot action 17 | eef_pos = action[:3] 18 | eef_ang = action[3:6] 19 | eef_ang = euler_to_rotation_matrix(eef_ang) 20 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 21 | gripper_open = action[6:7] 22 | 23 | # Concatenate the action 24 | step['action'] = {} 25 | action = step['action'] 26 | 27 | arm_action = tf.concat([eef_pos, eef_ang, gripper_open], axis=0) 28 | action['arm_concat'] = arm_action 29 | action['terminate'] = step['is_terminal'] 30 | 31 | # Write the action format 32 | action['format'] = tf.constant( 33 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 34 | 35 | # Convert raw state to our state 36 | # Robot state 37 | state = step['observation'] 38 | eef_pos = state['eef_pose'][:3] 39 | eef_ang = state['eef_pose'][3:] 40 | eef_ang = quaternion_to_rotation_matrix(eef_ang) 41 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 42 | eef_pos_vel = state['eef_vel'][:3] 43 | eef_ang_vel = state['eef_vel'][3:] 44 | joint_pos = state['joint_pos'] 45 | joint_vel = state['joint_vel'] 46 | grip_pos = 1 - state['state_gripper_pose'] 47 | grip_pos = tf.expand_dims(grip_pos, axis=0) 48 | 49 | # Concatenate the state 50 | state['arm_concat'] = tf.concat([ 51 | joint_pos,joint_vel,grip_pos,eef_pos,eef_ang,eef_pos_vel,eef_ang_vel], axis=0) 52 | 53 | # Write the state format 54 | state['format'] = tf.constant( 55 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw") 56 | 57 | # Clean the task instruction 58 | # Define the replacements (old, new) as a dictionary 59 | replacements = { 60 | '_': ' ', 61 | '1f': ' ', 62 | '4f': ' ', 63 | '-': ' ', 64 | '50': ' ', 65 | '55': ' ', 66 | '56': ' ', 67 | # Refine language instruction: 68 | 'object': 'brick and insert it into the slot of the matching shape' 69 | } 70 | instr = step['language_instruction'] 71 | instr = clean_task_instruction(instr, replacements) 72 | step['observation']['natural_language_instruction'] = instr 73 | 74 | return step 75 | 76 | 77 | if __name__ == "__main__": 78 | pass 79 | -------------------------------------------------------------------------------- /data/preprocess_scripts/fractal20220817_data.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, quaternion_to_rotation_matrix,\ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | 8 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 9 | """ 10 | Convert terminate action to a boolean, where True means terminate. 11 | """ 12 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 13 | 14 | 15 | def process_step(step: dict) -> dict: 16 | """ 17 | Unify the action format and clean the task instruction. 18 | 19 | DO NOT use python list, use tf.TensorArray instead. 20 | """ 21 | # Convert raw action to our action 22 | action = step['action'] 23 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 24 | 25 | eef_delta_pos = action['world_vector'] 26 | eef_ang = action['rotation_delta'] 27 | eef_ang = euler_to_quaternion(eef_ang) 28 | grip_open = 1 - (action['gripper_closedness_action'] + 1) / 2 29 | # Multiplied by 3 Hz to get units m/s and rad/s 30 | base_delta_pos = action['base_displacement_vector'] * 3 31 | base_delta_ang = action['base_displacement_vertical_rotation'] * 3 32 | 33 | # Concatenate the action 34 | arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 35 | action['arm_concat'] = arm_action 36 | base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0) 37 | action['base_concat'] = base_action 38 | 39 | # Write the action format 40 | action['format'] = tf.constant( 41 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open,base_vel_x,base_vel_y,base_angular_vel") 42 | 43 | # Convert raw state to our state 44 | state = step['observation'] 45 | eef_pos = state['base_pose_tool_reached'][:3] 46 | # eef_ang = quaternion_to_euler(state['base_pose_tool_reached'][3:]) 47 | eef_ang = quaternion_to_rotation_matrix(state['base_pose_tool_reached'][3:]) 48 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 49 | grip_open = 1 - state['gripper_closed'] 50 | 51 | # Concatenate the state 52 | state['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0) 53 | 54 | # Write the state format 55 | state['format'] = tf.constant( 56 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 57 | 58 | # Clean the task instruction 59 | # Define the replacements (old, new) as a dictionary 60 | replacements = { 61 | '_': ' ', 62 | '1f': ' ', 63 | '4f': ' ', 64 | '-': ' ', 65 | '50': ' ', 66 | '55': ' ', 67 | '56': ' ', 68 | 69 | } 70 | instr = step['observation']['natural_language_instruction'] 71 | instr = clean_task_instruction(instr, replacements) 72 | step['observation']['natural_language_instruction'] = instr 73 | 74 | return step 75 | 76 | 77 | if __name__ == "__main__": 78 | import tensorflow_datasets as tfds 79 | from data.utils import dataset_to_path 80 | 81 | DATASET_DIR = 'data/datasets/openx_embod' 82 | DATASET_NAME = 'fractal20220817_data' 83 | # Load the dataset 84 | dataset = tfds.builder_from_directory( 85 | builder_dir=dataset_to_path( 86 | DATASET_NAME, DATASET_DIR)) 87 | dataset = dataset.as_dataset(split='all') 88 | 89 | # Inspect the dataset 90 | for episode in dataset: 91 | for step in episode['steps']: 92 | print(step) 93 | -------------------------------------------------------------------------------- /data/preprocess_scripts/iamlab_cmu_pickup_insert_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction,quaternion_to_euler 4 | 5 | def process_step(step: dict) -> dict: 6 | """ 7 | Unify the action format and clean the task instruction. 8 | 9 | DO NOT use python list, use tf.TensorArray instead. 10 | """ 11 | # Convert raw action to our action 12 | 13 | origin_action = step['action'] 14 | step['action']={} 15 | action=step['action'] 16 | 17 | eef_delta_pos = origin_action[:3] 18 | # delta ZYX euler angles 19 | # eef_ang=quaternion_to_euler(origin_action[3:7]) 20 | eef_ang = origin_action[3:7] 21 | grip_open=origin_action[7:8] 22 | 23 | # No base found 24 | 25 | # Concatenate the action 26 | action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0) 27 | 28 | # Write the action format 29 | action['format'] = tf.constant( 30 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 31 | 32 | # Convert raw state to our state 33 | state = step['observation'] 34 | # Concatenate the state 35 | # 7x robot joint angles, 1x gripper status, 6x joint torques, 6x end-effector force 36 | arm_joint_ang=state['state'][:7] 37 | 38 | grip_open=state['state'][7:8] 39 | 40 | state['arm_concat'] = tf.concat([arm_joint_ang,grip_open],axis=0) 41 | 42 | # Write the state format 43 | state['format'] = tf.constant( 44 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open") 45 | 46 | # Clean the task instruction 47 | # Define the replacements (old, new) as a dictionary 48 | replacements = { 49 | '_': ' ', 50 | '1f': ' ', 51 | '4f': ' ', 52 | '-': ' ', 53 | '50': ' ', 54 | '55': ' ', 55 | '56': ' ', 56 | 57 | } 58 | instr = step['language_instruction'] 59 | instr = clean_task_instruction(instr, replacements) 60 | step['observation']['natural_language_instruction'] = instr 61 | 62 | return step 63 | 64 | 65 | if __name__ == "__main__": 66 | import tensorflow_datasets as tfds 67 | from data.utils import dataset_to_path 68 | 69 | DATASET_DIR = 'data/datasets/openx_embod' 70 | DATASET_NAME = 'iamlab_cmu_pickup_insert_converted_externally_to_rlds' 71 | # Load the dataset 72 | dataset = tfds.builder_from_directory( 73 | builder_dir=dataset_to_path( 74 | DATASET_NAME, DATASET_DIR)) 75 | dataset = dataset.as_dataset(split='all') 76 | 77 | # Inspect the dataset 78 | for episode in dataset: 79 | for step in episode['steps']: 80 | print(step) 81 | -------------------------------------------------------------------------------- /data/preprocess_scripts/imperialcollege_sawyer_wrist_cam.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction 4 | 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | 14 | origin_action = step['action'] 15 | step['action'] = {} 16 | action = step['action'] 17 | 18 | # Multiplied by 10 Hz to get units m/s and rad/s 19 | eef_delta_pos = origin_action[:3] * 10 20 | # delta ZYX euler angles == roll/pitch/yaw velocities 21 | eef_ang = origin_action[3:6] * 10 22 | grip_open = 1 - origin_action[6:7] 23 | 24 | # No base found 25 | 26 | # Concatenate the action 27 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open],axis=0) 28 | 29 | # Write the action format 30 | action['format'] = tf.constant( 31 | "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open") 32 | 33 | # Convert raw state to our state 34 | # state = step['observation'] 35 | # # Concatenate the state 36 | # grip_open=state['state'] 37 | # state['arm_concat'] = grip_open 38 | 39 | # Write the state format 40 | # state['format'] = tf.constant( 41 | # "gripper_open") 42 | 43 | # Clean the task instruction 44 | # Define the replacements (old, new) as a dictionary 45 | replacements = { 46 | '_': ' ', 47 | '1f': ' ', 48 | '4f': ' ', 49 | '-': ' ', 50 | '50': ' ', 51 | '55': ' ', 52 | '56': ' ', 53 | 54 | } 55 | instr = step['language_instruction'] 56 | instr = clean_task_instruction(instr, replacements) 57 | step['observation']['natural_language_instruction'] = instr 58 | 59 | return step 60 | 61 | 62 | if __name__ == "__main__": 63 | import tensorflow_datasets as tfds 64 | from data.utils import dataset_to_path 65 | 66 | DATASET_DIR = 'data/datasets/openx_embod' 67 | DATASET_NAME = 'imperialcollege_sawyer_wrist_cam' 68 | # Load the dataset 69 | dataset = tfds.builder_from_directory( 70 | builder_dir=dataset_to_path( 71 | DATASET_NAME, DATASET_DIR)) 72 | dataset = dataset.as_dataset(split='all') 73 | 74 | # Inspect the dataset 75 | for episode in dataset: 76 | for step in episode['steps']: 77 | print(step) 78 | -------------------------------------------------------------------------------- /data/preprocess_scripts/jaco_play.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 23 | eef_delta_pos = action['world_vector'] 24 | # eef_ang = action['rotation_delta'] 25 | # (NOTE) due to the formality problem, grip_open is not used 26 | # grip_open = 1 - (action['gripper_closedness_action'] ) / 2 27 | # base_delta_pos = action['base_displacement_vector'] 28 | # base_delta_ang = action['base_displacement_vertical_rotation'] 29 | 30 | # Concatenate the action 31 | arm_action = eef_delta_pos 32 | action['arm_concat'] = arm_action 33 | # base_action = tf.constant([0, 0, 0, 0], dtype=tf.float32) 34 | # action['base_concat'] = None 35 | 36 | # Write the action format 37 | action['format'] = tf.constant( 38 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z") 39 | 40 | # Convert raw state to our state 41 | state = step['observation'] 42 | joint_pos = state['joint_pos'] 43 | eef_pos = state['end_effector_cartesian_pos'][:3] 44 | eef_quat = state['end_effector_cartesian_pos'][3:] 45 | eef_ang = quaternion_to_rotation_matrix(eef_quat) 46 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 47 | eef_vel = state['end_effector_cartesian_velocity'][:3] 48 | # We do not use angular velocity since it is very inaccurate in this environment 49 | # eef_angular_vel = state['end_effector_cartesian_velocity'][3:] 50 | # Concatenate the state 51 | state['arm_concat'] = tf.concat([joint_pos, eef_pos, eef_ang, eef_vel], axis=0) 52 | 53 | # Write the state format 54 | state['format'] = tf.constant( 55 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z") 56 | 57 | # Clean the task instruction 58 | # Define the replacements (old, new) as a dictionary 59 | replacements = { 60 | '_': ' ', 61 | '1f': ' ', 62 | '4f': ' ', 63 | '-': ' ', 64 | '50': ' ', 65 | '55': ' ', 66 | '56': ' ', 67 | 68 | } 69 | instr = step['observation']['natural_language_instruction'] 70 | instr = clean_task_instruction(instr, replacements) 71 | step['observation']['natural_language_instruction'] = instr 72 | 73 | return step 74 | 75 | 76 | if __name__ == "__main__": 77 | import tensorflow_datasets as tfds 78 | from data.utils import dataset_to_path 79 | 80 | DATASET_DIR = 'data/datasets/openx_embod' 81 | DATASET_NAME = 'jaco_play' 82 | # Load the dataset 83 | dataset = tfds.builder_from_directory( 84 | builder_dir=dataset_to_path( 85 | DATASET_NAME, DATASET_DIR)) 86 | dataset = dataset.as_dataset(split='all') 87 | 88 | # Inspect the dataset 89 | for episode in dataset: 90 | for step in episode['steps']: 91 | print(step) 92 | -------------------------------------------------------------------------------- /data/preprocess_scripts/language_table.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction 4 | 5 | def process_step(step: dict) -> dict: 6 | """ 7 | Unify the action format and clean the task instruction. 8 | 9 | DO NOT use python list, use tf.TensorArray instead. 10 | """ 11 | # Convert raw action to our action 12 | 13 | origin_action = step['action'] 14 | step['action']={} 15 | action=step['action'] 16 | 17 | eef_delta_pos=origin_action 18 | # No base found 19 | 20 | # Concatenate the action 21 | action['arm_concat'] = eef_delta_pos 22 | action['terminate'] = step['is_terminal'] 23 | 24 | # Write the action format 25 | action['format'] = tf.constant( 26 | "eef_delta_pos_x,eef_delta_pos_y") 27 | 28 | # Convert raw state to our state 29 | state = step['observation'] 30 | # Concatenate the state 31 | eef_pos=state['effector_translation'] 32 | state['arm_concat'] = eef_pos 33 | # Write the state format 34 | state['format'] = tf.constant( 35 | "eef_pos_x,eef_pos_y") 36 | 37 | # Clean the task instruction 38 | # Define the replacements (old, new) as a dictionary 39 | replacements = { 40 | '_': ' ', 41 | '1f': ' ', 42 | '4f': ' ', 43 | '-': ' ', 44 | '50': ' ', 45 | '55': ' ', 46 | '56': ' ', 47 | 48 | } 49 | instr = step['observation']['instruction'] 50 | # Convert bytes to tf.string 51 | instr = tf.strings.unicode_encode(instr, 'UTF-8') 52 | # Remove '\x00' 53 | instr = tf.strings.regex_replace(instr, '\x00', '') 54 | instr = clean_task_instruction(instr, replacements) 55 | step['observation']['natural_language_instruction'] = instr 56 | return step 57 | 58 | 59 | if __name__ == "__main__": 60 | import tensorflow_datasets as tfds 61 | from data.utils import dataset_to_path 62 | 63 | DATASET_DIR = 'data/datasets/openx_embod' 64 | DATASET_NAME = 'language_table' 65 | # Load the dataset 66 | dataset = tfds.builder_from_directory( 67 | builder_dir=dataset_to_path( 68 | DATASET_NAME, DATASET_DIR)) 69 | dataset = dataset.as_dataset(split='all') 70 | 71 | # Inspect the dataset 72 | for episode in dataset: 73 | for step in episode['steps']: 74 | print(step) 75 | 76 | -------------------------------------------------------------------------------- /data/preprocess_scripts/nyu_door_opening_surprising_effectiveness.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32)) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | # Convert raw action to our action 20 | action = step['action'] 21 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 22 | 23 | # Multiplied by 3 Hz to get units m/s and rad/s 24 | eef_delta_pos = action['world_vector'] * 3 25 | eef_ang = action['rotation_delta'] * 3 26 | # Origin: [-0.28, 0.96]: open, close 27 | # 1-Origin: [0.04, 1.28]: close, open 28 | grip_open = 1 - action['gripper_closedness_action'] 29 | # base_delta_pos = action['base_displacement_vector'] 30 | # base_delta_ang = action['base_displacement_vertical_rotation'] 31 | 32 | # Concatenate the action 33 | arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 34 | action['arm_concat'] = arm_action 35 | # base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0) 36 | # action['base_concat'] = base_action 37 | 38 | # Write the action format 39 | action['format'] = tf.constant( 40 | "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open") 41 | 42 | # Convert raw state to our state 43 | # state = step['observation'] 44 | # eef_pos = state['base_pose_tool_reached'][:3] 45 | # eef_ang = quaternion_to_euler(state['base_pose_tool_reached'][3:]) 46 | # grip_open = 1 - state['gripper_closed'] 47 | 48 | # create empty tensor 49 | # state['arm_concat'] = tf.constant([0, 0, 0, 0, 0, 0], dtype=tf.float32) 50 | 51 | # Write the state format 52 | # state['format'] = tf.constant( 53 | # "") 54 | 55 | # Define the task instruction 56 | step['observation']['natural_language_instruction'] = tf.constant( 57 | "Open the cabinet door.") 58 | 59 | return step 60 | 61 | 62 | if __name__ == "__main__": 63 | import tensorflow_datasets as tfds 64 | from data.utils import dataset_to_path 65 | 66 | DATASET_DIR = 'data/datasets/openx_embod' 67 | DATASET_NAME = 'nyu_door_opening_surprising_effectiveness' 68 | # Load the dataset 69 | dataset = tfds.builder_from_directory( 70 | builder_dir=dataset_to_path( 71 | DATASET_NAME, DATASET_DIR)) 72 | dataset = dataset.as_dataset(split='all') 73 | 74 | # Inspect the dataset 75 | for episode in dataset.take(1): 76 | for step in episode['steps']: 77 | print(step) 78 | -------------------------------------------------------------------------------- /data/preprocess_scripts/nyu_franka_play_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 11 | 12 | def process_step(step: dict) -> dict: 13 | """ 14 | Unify the action format and clean the task instruction. 15 | 16 | DO NOT use python list, use tf.TensorArray instead. 17 | """ 18 | # Convert raw action to our action 19 | 20 | origin_action = step['action'] 21 | step['action']={} 22 | action=step['action'] 23 | action['terminate']=terminate_act_to_bool(origin_action[14]) 24 | joint_vel=origin_action[:7] 25 | # gripper_open: 1-open, -1-closed 26 | 27 | grip_open=tf.where(tf.equal(origin_action[13:14],tf.constant(1.0)),tf.constant(1.0),tf.constant(0.0)) 28 | eef_pos=origin_action[7:10] 29 | eef_ang=origin_action[10:13] 30 | eef_ang = euler_to_quaternion(eef_ang) 31 | # No base found 32 | 33 | # Concatenate the action 34 | action['arm_concat'] = tf.concat([joint_vel,grip_open,eef_pos,eef_ang],axis=0) 35 | 36 | # Write the action format 37 | action['format'] = tf.constant( 38 | "arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") 39 | 40 | # Convert raw state to our state 41 | state = step['observation'] 42 | # Concatenate the state 43 | state_arm_joint = state['state'][:7] 44 | eef_pos = state['state'][7:10] 45 | eef_ang = euler_to_rotation_matrix(state['state'][10:13]) 46 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 47 | state['arm_concat'] = tf.concat([state_arm_joint,eef_pos,eef_ang],axis=0) 48 | 49 | # Write the state format 50 | state['format'] = tf.constant( 51 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 52 | 53 | # Clean the task instruction 54 | # Define the replacements (old, new) as a dictionary 55 | replacements = { 56 | '_': ' ', 57 | '1f': ' ', 58 | '4f': ' ', 59 | '-': ' ', 60 | '50': ' ', 61 | '55': ' ', 62 | '56': ' ', 63 | 64 | } 65 | instr = step['language_instruction'] 66 | instr = clean_task_instruction(instr, replacements) 67 | step['observation']['natural_language_instruction'] = instr 68 | 69 | return step 70 | 71 | 72 | if __name__ == "__main__": 73 | import tensorflow_datasets as tfds 74 | from data.utils import dataset_to_path 75 | 76 | DATASET_DIR = 'data/datasets/openx_embod' 77 | DATASET_NAME = 'nyu_franka_play_dataset_converted_externally_to_rlds' 78 | # Load the dataset 79 | dataset = tfds.builder_from_directory( 80 | builder_dir=dataset_to_path( 81 | DATASET_NAME, DATASET_DIR)) 82 | dataset = dataset.as_dataset(split='all') 83 | 84 | # Inspect the dataset 85 | for episode in dataset: 86 | for step in episode['steps']: 87 | print(step) 88 | -------------------------------------------------------------------------------- /data/preprocess_scripts/nyu_rot_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | 14 | origin_action = step['action'] 15 | step['action']={} 16 | action=step['action'] 17 | action['terminate'] = step['is_terminal'] 18 | 19 | eef_delta_pos = origin_action[:3] 20 | eef_ang=origin_action[3:6] 21 | eef_ang = euler_to_quaternion(eef_ang) 22 | # gripper_open: 0-open, 1-closed 23 | grip_open=tf.where(tf.equal(origin_action[6:],tf.constant(0.0)),tf.constant(1.0),tf.constant(0.0)) 24 | 25 | # No base found 26 | 27 | # Concatenate the action 28 | action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0) 29 | 30 | # Write the action format 31 | action['format'] = tf.constant( 32 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 33 | 34 | # Convert raw state to our state 35 | state = step['observation'] 36 | eef_pos=state['state'][:3] 37 | eef_ang=state['state'][3:6] 38 | eef_ang = euler_to_rotation_matrix(eef_ang) 39 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 40 | grip_open=1-state['state'][6:7] 41 | # Concatenate the state 42 | state['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0) 43 | 44 | # Write the state format 45 | state['format'] = tf.constant( 46 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 47 | 48 | # Clean the task instruction 49 | # Define the replacements (old, new) as a dictionary 50 | replacements = { 51 | '_': ' ', 52 | '1f': ' ', 53 | '4f': ' ', 54 | '-': ' ', 55 | '50': ' ', 56 | '55': ' ', 57 | '56': ' ', 58 | 59 | } 60 | instr = step['language_instruction'] 61 | instr = clean_task_instruction(instr, replacements) 62 | step['observation']['natural_language_instruction'] = instr 63 | 64 | return step 65 | 66 | 67 | if __name__ == "__main__": 68 | import tensorflow_datasets as tfds 69 | from data.utils import dataset_to_path 70 | 71 | DATASET_DIR = 'data/datasets/openx_embod' 72 | DATASET_NAME = 'nyu_rot_dataset_converted_externally_to_rlds' 73 | # Load the dataset 74 | dataset = tfds.builder_from_directory( 75 | builder_dir=dataset_to_path( 76 | DATASET_NAME, DATASET_DIR)) 77 | dataset = dataset.as_dataset(split='all') 78 | 79 | # Inspect the dataset 80 | for episode in dataset: 81 | for step in episode['steps']: 82 | print(step) 83 | -------------------------------------------------------------------------------- /data/preprocess_scripts/qut_dexterous_manpulation.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d 4 | 5 | 6 | def process_step(step: dict) -> dict: 7 | """ 8 | Unify the action format and clean the task instruction. 9 | 10 | DO NOT use python list, use tf.TensorArray instead. 11 | """ 12 | # Convert raw action to our action 13 | action = step['action'] 14 | eef_pos = action[:3] 15 | eef_quat = action[3:7] 16 | eef_ang = quaternion_to_rotation_matrix(eef_quat) 17 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 18 | grip_pos = action[7:] 19 | 20 | # Concatenate the action 21 | step['action'] = {} 22 | action = step['action'] 23 | action['arm_concat'] = tf.concat([ 24 | grip_pos,eef_pos,eef_ang], axis=0) 25 | # Write the action format 26 | action['format'] = tf.constant( 27 | "gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 28 | action['terminate'] = step['is_terminal'] 29 | 30 | # Convert raw state to our state 31 | state = step['observation'] 32 | robot_state = state['state'] 33 | eef_pos = robot_state[:3] 34 | eef_quat = robot_state[3:7] 35 | eef_ang = quaternion_to_rotation_matrix(eef_quat) 36 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 37 | joint_pos = robot_state[7:14] 38 | grip_pos = robot_state[14:16] * 24.707 # rescale to [0, 1] 39 | joint_vel = robot_state[16:23] 40 | 41 | # Concatenate the state 42 | state['arm_concat'] = tf.concat([ 43 | grip_pos,eef_pos,eef_ang,joint_pos,joint_vel], axis=0) 44 | 45 | # Write the state format 46 | state['format'] = tf.constant( 47 | "gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel") 48 | 49 | # Clean the task instruction 50 | # Define the replacements (old, new) as a dictionary 51 | replacements = { 52 | '_': ' ', 53 | '1f': ' ', 54 | '4f': ' ', 55 | '-': ' ', 56 | '50': ' ', 57 | '55': ' ', 58 | '56': ' ', 59 | } 60 | instr = step['language_instruction'] 61 | instr = clean_task_instruction(instr, replacements) 62 | step['observation']['natural_language_instruction'] = instr 63 | 64 | return step 65 | 66 | 67 | if __name__ == "__main__": 68 | pass 69 | -------------------------------------------------------------------------------- /data/preprocess_scripts/robo_net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 5 | rotation_matrix_to_ortho6d 6 | 7 | 8 | def process_step(step: dict) -> dict: 9 | """ 10 | Unify the action format and clean the task instruction. 11 | 12 | DO NOT use python list, use tf.TensorArray instead. 13 | """ 14 | # Convert raw action to our action 15 | action = step['action'] 16 | eef_delta_pos = action[:3] 17 | eef_delta_angle_yaw = action[3:4] 18 | eef_ang = tf.stack([0.0, 0.0, eef_delta_angle_yaw[0]], axis=0) 19 | eef_ang = euler_to_quaternion(eef_ang) 20 | eef_gripper_open = (1 - action[4:5]) / 2 21 | 22 | step['action'] = {} 23 | action = step['action'] 24 | action['terminate'] = step['is_terminal'] 25 | 26 | # No base found 27 | 28 | # Concatenate the action 29 | arm_action = tf.concat([eef_delta_pos, eef_ang, eef_gripper_open], axis=0) 30 | action['arm_concat'] = arm_action 31 | 32 | # Write the action format 33 | action['format'] = tf.constant( 34 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 35 | 36 | # Convert raw state to our state 37 | state = step['observation'] 38 | eef_pos = state['state'][:3] 39 | eef_ang_yaw = state['state'][3:4] 40 | eef_ang = tf.stack([0.0, 0.0, eef_ang_yaw[0]], axis=0) 41 | eef_ang = euler_to_rotation_matrix(eef_ang) 42 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 43 | grip_joint_pos = state['state'][4:5] 44 | # If abs(grip_joint_pos) > 3.15, then convert it to the radian 45 | grip_joint_pos = tf.cond(tf.greater(tf.abs(grip_joint_pos), 3.15), 46 | lambda: grip_joint_pos / 180 * np.pi, 47 | lambda: grip_joint_pos) 48 | # Concatenate the state 49 | state['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_joint_pos],axis=0) 50 | 51 | # Write the state format 52 | state['format'] = tf.constant( 53 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 54 | 55 | # Clean the task instruction 56 | # Define the replacements (old, new) as a dictionary 57 | replacements = { 58 | '_': ' ', 59 | '1f': ' ', 60 | '4f': ' ', 61 | '-': ' ', 62 | '50': ' ', 63 | '55': ' ', 64 | '56': ' ', 65 | 66 | } 67 | instr = step['language_instruction'] 68 | instr = clean_task_instruction(instr, replacements) 69 | step['observation']['natural_language_instruction'] = instr 70 | 71 | return step 72 | -------------------------------------------------------------------------------- /data/preprocess_scripts/robomimic_can_ph.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_datasets as tfds 3 | from data.utils import clean_task_instruction, quaternion_to_euler 4 | 5 | def load_dataset(): 6 | builder = tfds.builder('robomimic_ph/can_ph_image') 7 | builder.download_and_prepare() 8 | ds = builder.as_dataset(split='train', shuffle_files=True) 9 | return ds 10 | 11 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 12 | """ 13 | Convert terminate action to a boolean, where True means terminate. 14 | """ 15 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 16 | 17 | def process_step(step: dict) -> dict: 18 | """ 19 | Unify the action format and clean the task instruction. 20 | 21 | DO NOT use python list, use tf.TensorArray instead. 22 | """ 23 | # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg 24 | # Convert raw action to our action 25 | eef = step['action'] 26 | step['action'] = {} 27 | action = step['action'] 28 | action['terminate'] = step['is_terminal'] 29 | 30 | eef_delta_pos = eef[:3] 31 | eef_ang = eef[3:6] 32 | # No base found 33 | 34 | # Concatenate the action 35 | arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) 36 | action['arm_concat'] = arm_action 37 | 38 | # Write the action format 39 | action['format'] = tf.constant( 40 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw") 41 | 42 | # Convert raw state to our state 43 | state = step['observation'] 44 | 45 | arm_joint_pos = state['robot0_joint_pos'] 46 | arm_joint_vel = state['robot0_joint_vel'] 47 | gripper_pos = state['robot0_gripper_qpos'] 48 | gripper_vel = state['robot0_gripper_qvel'] 49 | eef_pos = state['robot0_eef_pos'] 50 | eef_ang = quaternion_to_euler(state['robot0_eef_quat']) 51 | 52 | state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0) 53 | # convert to tf32 54 | state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32) 55 | # Write the state format 56 | state['format'] = tf.constant( 57 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw") 58 | 59 | # Clean the task instruction 60 | # Define the replacements (old, new) as a dictionary 61 | replacements = { 62 | '_': ' ', 63 | '1f': ' ', 64 | '4f': ' ', 65 | '-': ' ', 66 | '50': ' ', 67 | '55': ' ', 68 | '56': ' ', 69 | 70 | } 71 | # manual added by lbg 72 | instr = "lift the can into the the box" 73 | instr = clean_task_instruction(instr, replacements) 74 | step['observation']['natural_language_instruction'] = instr 75 | 76 | return step 77 | 78 | 79 | if __name__ == "__main__": 80 | import tensorflow_datasets as tfds 81 | from data.utils import dataset_to_path 82 | 83 | DATASET_DIR = 'data/datasets/openx_embod' 84 | DATASET_NAME = 'roboturk' 85 | # Load the dataset 86 | dataset = tfds.builder_from_directory( 87 | builder_dir=dataset_to_path( 88 | DATASET_NAME, DATASET_DIR)) 89 | dataset = dataset.as_dataset(split='all').take(1) 90 | 91 | # Inspect the dataset 92 | ze=tf.constant(0.0) 93 | for episode in dataset: 94 | for step in episode['steps']: 95 | print(step) 96 | break 97 | -------------------------------------------------------------------------------- /data/preprocess_scripts/robomimic_lift_ph.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_datasets as tfds 3 | from data.utils import clean_task_instruction, quaternion_to_euler 4 | 5 | 6 | def load_dataset(): 7 | builder = tfds.builder('robomimic_ph/lift_ph_image') 8 | builder.download_and_prepare() 9 | ds = builder.as_dataset(split='train', shuffle_files=True) 10 | return ds 11 | 12 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 13 | """ 14 | Convert terminate action to a boolean, where True means terminate. 15 | """ 16 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 17 | 18 | def process_step(step: dict) -> dict: 19 | """ 20 | Unify the action format and clean the task instruction. 21 | 22 | DO NOT use python list, use tf.TensorArray instead. 23 | """ 24 | # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg 25 | # Convert raw action to our action 26 | eef = step['action'] 27 | step['action'] = {} 28 | action = step['action'] 29 | action['terminate'] = step['is_terminal'] 30 | 31 | eef_delta_pos = eef[:3] 32 | eef_ang = quaternion_to_euler(eef[3:]) 33 | 34 | # No base found 35 | 36 | # Concatenate the action 37 | arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) 38 | action['arm_concat'] = arm_action 39 | 40 | # Write the action format 41 | action['format'] = tf.constant( 42 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw") 43 | 44 | # Convert raw state to our state 45 | state = step['observation'] 46 | arm_joint_pos = state['robot0_joint_pos'] 47 | arm_joint_vel = state['robot0_joint_vel'] 48 | gripper_pos = state['robot0_gripper_qpos'] 49 | gripper_vel = state['robot0_gripper_qvel'] 50 | eef_pos = state['robot0_eef_pos'] 51 | eef_ang = quaternion_to_euler(state['robot0_eef_quat']) 52 | 53 | state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0) 54 | # convert to tf32 55 | state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32) 56 | # Write the state format 57 | state['format'] = tf.constant( 58 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw") 59 | 60 | # Clean the task instruction 61 | # Define the replacements (old, new) as a dictionary 62 | replacements = { 63 | '_': ' ', 64 | '1f': ' ', 65 | '4f': ' ', 66 | '-': ' ', 67 | '50': ' ', 68 | '55': ' ', 69 | '56': ' ', 70 | 71 | } 72 | # manual added by lbg 73 | instr = "lift the object on the table" 74 | instr = clean_task_instruction(instr, replacements) 75 | step['observation']['natural_language_instruction'] = instr 76 | 77 | return step 78 | 79 | 80 | if __name__ == "__main__": 81 | import tensorflow_datasets as tfds 82 | from data.utils import dataset_to_path 83 | 84 | DATASET_DIR = 'data/datasets/openx_embod' 85 | DATASET_NAME = 'roboturk' 86 | # Load the dataset 87 | dataset = tfds.builder_from_directory( 88 | builder_dir=dataset_to_path( 89 | DATASET_NAME, DATASET_DIR)) 90 | dataset = dataset.as_dataset(split='all').take(1) 91 | 92 | # Inspect the dataset 93 | ze=tf.constant(0.0) 94 | for episode in dataset: 95 | for step in episode['steps']: 96 | print(step) 97 | break 98 | -------------------------------------------------------------------------------- /data/preprocess_scripts/robomimic_square_ph.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_datasets as tfds 3 | from data.utils import clean_task_instruction, quaternion_to_euler 4 | 5 | 6 | def load_dataset(): 7 | builder = tfds.builder('robomimic_ph/square_ph_image') 8 | builder.download_and_prepare() 9 | ds = builder.as_dataset(split='train', shuffle_files=True) 10 | return ds 11 | 12 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 13 | """ 14 | Convert terminate action to a boolean, where True means terminate. 15 | """ 16 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 17 | 18 | def process_step(step: dict) -> dict: 19 | """ 20 | Unify the action format and clean the task instruction. 21 | 22 | DO NOT use python list, use tf.TensorArray instead. 23 | """ 24 | # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg 25 | # Convert raw action to our action 26 | eef = step['action'] 27 | step['action'] = {} 28 | action = step['action'] 29 | action['terminate'] = step['is_terminal'] 30 | 31 | eef_delta_pos = eef[:3] 32 | eef_ang = quaternion_to_euler(eef[3:]) 33 | 34 | # No base found 35 | 36 | # Concatenate the action 37 | arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) 38 | action['arm_concat'] = arm_action 39 | 40 | # Write the action format 41 | action['format'] = tf.constant( 42 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw") 43 | 44 | # Convert raw state to our state 45 | state = step['observation'] 46 | arm_joint_pos = state['robot0_joint_pos'] 47 | arm_joint_vel = state['robot0_joint_vel'] 48 | gripper_pos = state['robot0_gripper_qpos'] 49 | gripper_vel = state['robot0_gripper_qvel'] 50 | eef_pos = state['robot0_eef_pos'] 51 | eef_ang = quaternion_to_euler(state['robot0_eef_quat']) 52 | 53 | state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0) 54 | # convert to tf32 55 | state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32) 56 | # Write the state format 57 | state['format'] = tf.constant( 58 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw") 59 | 60 | # Clean the task instruction 61 | # Define the replacements (old, new) as a dictionary 62 | replacements = { 63 | '_': ' ', 64 | '1f': ' ', 65 | '4f': ' ', 66 | '-': ' ', 67 | '50': ' ', 68 | '55': ' ', 69 | '56': ' ', 70 | 71 | } 72 | # manual added by lbg 73 | instr = "move the square across the cube" 74 | instr = clean_task_instruction(instr, replacements) 75 | step['observation']['natural_language_instruction'] = instr 76 | 77 | return step 78 | 79 | 80 | if __name__ == "__main__": 81 | import tensorflow_datasets as tfds 82 | from data.utils import dataset_to_path 83 | 84 | DATASET_DIR = 'data/datasets/openx_embod' 85 | DATASET_NAME = 'roboturk' 86 | # Load the dataset 87 | dataset = tfds.builder_from_directory( 88 | builder_dir=dataset_to_path( 89 | DATASET_NAME, DATASET_DIR)) 90 | dataset = dataset.as_dataset(split='all').take(1) 91 | 92 | # Inspect the dataset 93 | ze=tf.constant(0.0) 94 | for episode in dataset: 95 | for step in episode['steps']: 96 | print(step) 97 | break 98 | -------------------------------------------------------------------------------- /data/preprocess_scripts/roboturk.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | # Convert raw action to our action 20 | action = step['action'] 21 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 22 | 23 | eef_delta_pos = action['world_vector'] 24 | eef_ang = action['rotation_delta'] 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | 27 | grip_open = tf.where(action['gripper_closedness_action']<0,tf.constant(1.0),tf.constant(0.0)) 28 | 29 | # No base found 30 | 31 | # Concatenate the action 32 | arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 33 | action['arm_concat'] = arm_action 34 | 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 38 | 39 | # No state found 40 | 41 | # Clean the task instruction 42 | # Define the replacements (old, new) as a dictionary 43 | replacements = { 44 | '_': ' ', 45 | '1f': ' ', 46 | '4f': ' ', 47 | '-': ' ', 48 | '50': ' ', 49 | '55': ' ', 50 | '56': ' ', 51 | 52 | } 53 | instr = step['observation']['natural_language_instruction'] 54 | instr = clean_task_instruction(instr, replacements) 55 | step['observation']['natural_language_instruction'] = instr 56 | 57 | return step 58 | 59 | 60 | if __name__ == "__main__": 61 | import tensorflow_datasets as tfds 62 | from data.utils import dataset_to_path 63 | 64 | DATASET_DIR = 'data/datasets/openx_embod' 65 | DATASET_NAME = 'roboturk' 66 | # Load the dataset 67 | dataset = tfds.builder_from_directory( 68 | builder_dir=dataset_to_path( 69 | DATASET_NAME, DATASET_DIR)) 70 | dataset = dataset.as_dataset(split='all').take(1) 71 | 72 | # Inspect the dataset 73 | ze=tf.constant(0.0) 74 | for episode in dataset: 75 | for step in episode['steps']: 76 | print(step) 77 | break 78 | -------------------------------------------------------------------------------- /data/preprocess_scripts/stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d 4 | 5 | def process_step(step: dict) -> dict: 6 | """ 7 | Unify the action format and clean the task instruction. 8 | 9 | DO NOT use python list, use tf.TensorArray instead. 10 | """ 11 | # Convert raw action to our action 12 | 13 | origin_action = step['action'] 14 | step['action']={} 15 | action=step['action'] 16 | action['terminate'] = step['is_terminal'] 17 | 18 | eef_delta_pos = origin_action[:3] 19 | 20 | # Ignore grip_open: -0.15~0.15 21 | 22 | # No base found 23 | 24 | # Concatenate the action 25 | action['arm_concat'] = eef_delta_pos 26 | 27 | # Write the action format 28 | action['format'] = tf.constant( 29 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z") 30 | 31 | # Convert raw state to our state 32 | state = step['observation'] 33 | eef_quat = state['ee_orientation'] 34 | eef_ang = quaternion_to_rotation_matrix(eef_quat) 35 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 36 | eef_ang_vel = state['ee_orientation_vel'] 37 | eef_pos = state['ee_position'] 38 | eef_vel = state['ee_vel'] 39 | joint_pos = state['joint_pos'] 40 | joint_vel = state['joint_vel'] 41 | gripper_open = state['state'][7:8] 42 | 43 | # Concatenate the state 44 | state['arm_concat'] = tf.concat([joint_pos,gripper_open,joint_vel,eef_pos,eef_ang,eef_vel,eef_ang_vel],axis=0) 45 | 46 | # Write the state format 47 | state['format'] = tf.constant( 48 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw") 49 | 50 | # Clean the task instruction 51 | # Define the replacements (old, new) as a dictionary 52 | replacements = { 53 | '_': ' ', 54 | '1f': ' ', 55 | '4f': ' ', 56 | '-': ' ', 57 | '50': ' ', 58 | '55': ' ', 59 | '56': ' ', 60 | 61 | } 62 | instr = step['language_instruction'] 63 | instr = clean_task_instruction(instr, replacements) 64 | step['observation']['natural_language_instruction'] = instr 65 | 66 | return step 67 | 68 | 69 | if __name__ == "__main__": 70 | import tensorflow_datasets as tfds 71 | from data.utils import dataset_to_path 72 | 73 | DATASET_DIR = 'data/datasets/openx_embod' 74 | DATASET_NAME = 'stanford_kuka_multimodal_dataset_converted_externally_to_rlds' 75 | # Load the dataset 76 | dataset = tfds.builder_from_directory( 77 | builder_dir=dataset_to_path( 78 | DATASET_NAME, DATASET_DIR)) 79 | dataset = dataset.as_dataset(split='all') 80 | 81 | # Inspect the dataset 82 | for episode in dataset: 83 | for step in episode['steps']: 84 | print(step) 85 | -------------------------------------------------------------------------------- /data/preprocess_scripts/stanford_robocook_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | # Robot action, consists of [3x robot end-effector velocities, 3x robot end-effector angular velocities, 1x gripper velocity]. 23 | eef_delta_pos = action[:3] 24 | eef_ang = action[3:6] 25 | grip_vel = tf.expand_dims(action[6], axis=0) 26 | 27 | # Concatenate the action 28 | # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 29 | step['action'] = {} 30 | action = step['action'] 31 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_vel], axis=0) 32 | action['terminate'] = step['is_terminal'] 33 | 34 | # Write the action format 35 | action['format'] = tf.constant( 36 | "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_joint_0_vel") 37 | 38 | # Convert raw state to our state 39 | state = step['observation'] 40 | state_vec = state['state'] 41 | # Robot state, consists of [3x robot end-effector position, 3x robot end-effector euler angles, 1x gripper position]. 42 | eef_pos = state_vec[:3] 43 | eef_ang = state_vec[3:6] 44 | eef_ang = euler_to_rotation_matrix(eef_ang) 45 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 46 | grip_joint_pos = state_vec[6:7] * 13.03 # rescale to [0, 1] 47 | 48 | # Concatenate the state 49 | state['arm_concat'] = tf.concat([grip_joint_pos,eef_pos, eef_ang], axis=0) 50 | 51 | # Write the state format 52 | state['format'] = tf.constant( 53 | "gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 54 | 55 | # Clean the task instruction 56 | # Define the replacements (old, new) as a dictionary 57 | replacements = { 58 | '_': ' ', 59 | '1f': ' ', 60 | '4f': ' ', 61 | '-': ' ', 62 | '50': ' ', 63 | '55': ' ', 64 | '56': ' ', 65 | 66 | } 67 | instr = step['language_instruction'] 68 | instr = clean_task_instruction(instr, replacements) 69 | step['observation']['natural_language_instruction'] = instr 70 | 71 | return step 72 | 73 | 74 | if __name__ == "__main__": 75 | import tensorflow_datasets as tfds 76 | from data.utils import dataset_to_path 77 | 78 | DATASET_DIR = 'data/datasets/openx_embod' 79 | DATASET_NAME = 'fractal20220817_data' 80 | # Load the dataset 81 | dataset = tfds.builder_from_directory( 82 | builder_dir=dataset_to_path( 83 | DATASET_NAME, DATASET_DIR)) 84 | dataset = dataset.as_dataset(split='all') 85 | 86 | # Inspect the dataset 87 | for episode in dataset: 88 | for step in episode['steps']: 89 | print(step) 90 | -------------------------------------------------------------------------------- /data/preprocess_scripts/tokyo_u_lsmo_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | # Convert raw action to our action 21 | action = step['action'] 22 | # Robot action, consists of [3x endeffector position, 3x euler angles,1x gripper action]. 23 | eef_delta_pos = action[:3] 24 | eef_ang = action[3:6] 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | grip_open = tf.expand_dims(1 - action[6], axis=0) 27 | 28 | # Concatenate the action 29 | step['action'] = {} 30 | action = step['action'] 31 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 32 | action['terminate'] = step['is_terminal'] 33 | 34 | # Write the action format 35 | action['format'] = tf.constant( 36 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 37 | 38 | # Convert raw state to our state 39 | state = step['observation'] 40 | state_vec = state['state'] 41 | # Robot state, consists of [3x endeffector position, 3x euler angles,6x robot joint angles, 1x gripper position]. 42 | eef_pos = state_vec[:3] 43 | eef_ang = state_vec[3:6] 44 | eef_ang = euler_to_rotation_matrix(eef_ang) 45 | eef_ang = rotation_matrix_to_ortho6d(eef_ang) 46 | arm_joint_ang = state_vec[6:12] 47 | grip_joint_pos = 1 - state_vec[12:13] 48 | 49 | # Concatenate the state 50 | state['arm_concat'] = tf.concat([arm_joint_ang, grip_joint_pos, eef_pos, eef_ang], axis=0) 51 | 52 | # Write the state format 53 | state['format'] = tf.constant( 54 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 55 | 56 | # Clean the task instruction 57 | # Define the replacements (old, new) as a dictionary 58 | replacements = { 59 | '_': ' ', 60 | '1f': ' ', 61 | '4f': ' ', 62 | '-': ' ', 63 | '50': ' ', 64 | '55': ' ', 65 | '56': ' ', 66 | 67 | } 68 | instr = step['language_instruction'] 69 | instr = clean_task_instruction(instr, replacements) 70 | step['observation']['natural_language_instruction'] = instr 71 | 72 | return step 73 | 74 | 75 | if __name__ == "__main__": 76 | import tensorflow_datasets as tfds 77 | from data.utils import dataset_to_path 78 | 79 | DATASET_DIR = 'data/datasets/openx_embod' 80 | DATASET_NAME = 'fractal20220817_data' 81 | # Load the dataset 82 | dataset = tfds.builder_from_directory( 83 | builder_dir=dataset_to_path( 84 | DATASET_NAME, DATASET_DIR)) 85 | dataset = dataset.as_dataset(split='all') 86 | 87 | # Inspect the dataset 88 | for episode in dataset: 89 | for step in episode['steps']: 90 | print(step) 91 | -------------------------------------------------------------------------------- /data/preprocess_scripts/toto.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | # Convert raw action to our action 20 | action = step['action'] 21 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 22 | eef_delta_pos = action['world_vector'] 23 | eef_ang = action['rotation_delta'] 24 | eef_ang = euler_to_quaternion(eef_ang) 25 | grip_open = tf.reshape(tf.where(action['open_gripper'],tf.constant(1.0),tf.constant(0.0)),(1,)) 26 | 27 | # No base found 28 | 29 | # Concatenate the action 30 | arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 31 | action['arm_concat'] = arm_action 32 | 33 | # Write the action format 34 | action['format'] = tf.constant( 35 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 36 | 37 | # Convert raw state to our state 38 | state = step['observation'] 39 | joint_pos=state['state'] 40 | 41 | # Concatenate the state 42 | state['arm_concat'] = joint_pos 43 | 44 | # Write the state format 45 | state['format'] = tf.constant( 46 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos") 47 | 48 | # Clean the task instruction 49 | # Define the replacements (old, new) as a dictionary 50 | replacements = { 51 | '_': ' ', 52 | '1f': ' ', 53 | '4f': ' ', 54 | '-': ' ', 55 | '50': ' ', 56 | '55': ' ', 57 | '56': ' ', 58 | 59 | } 60 | instr = step['observation']['natural_language_instruction'] 61 | instr = clean_task_instruction(instr, replacements) 62 | step['observation']['natural_language_instruction'] = instr 63 | 64 | return step 65 | 66 | 67 | if __name__ == "__main__": 68 | import tensorflow_datasets as tfds 69 | from data.utils import dataset_to_path 70 | 71 | DATASET_DIR = 'data/datasets/openx_embod' 72 | DATASET_NAME = 'toto' 73 | # Load the dataset 74 | dataset = tfds.builder_from_directory( 75 | builder_dir=dataset_to_path( 76 | DATASET_NAME, DATASET_DIR)) 77 | dataset = dataset.as_dataset(split='all') 78 | 79 | # Inspect the dataset 80 | for episode in dataset: 81 | for step in episode['steps']: 82 | print(step) 83 | -------------------------------------------------------------------------------- /data/preprocess_scripts/ucsd_kitchen_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | 20 | # Convert raw action to our action 21 | 22 | # 8-dimensional action, consisting of end-effector position and orientation, gripper open/close and a episode termination action. 23 | action = step['action'] 24 | eef_delta_pos = action[:3] 25 | eef_ang = action[3:6] 26 | eef_ang = euler_to_quaternion(eef_ang) 27 | grip_open = tf.expand_dims(1 - action[6], axis=0) 28 | 29 | # Concatenate the action 30 | # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 31 | step['action'] = {} 32 | action = step['action'] 33 | 34 | action['terminate'] = step['is_terminal'] 35 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 36 | 37 | # Write the action format 38 | action['format'] = tf.constant( 39 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 40 | 41 | # Convert raw state to our state 42 | state = step['observation']['state'] 43 | # 21-dimensional joint states, consists of robot joint angles, joint velocity and joint torque. 44 | arm_joint_pos = state[:7] 45 | arm_joint_vel = state[7:14] 46 | # arm_joint_torque = state[14:21] 47 | 48 | # Concatenate the state 49 | 50 | state = step['observation'] 51 | state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel], axis=0) 52 | 53 | # Write the state format 54 | state['format'] = tf.constant( 55 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel") 56 | 57 | # Clean the task instruction 58 | # Define the replacements (old, new) as a dictionary 59 | replacements = { 60 | '_': ' ', 61 | '1f': ' ', 62 | '4f': ' ', 63 | '-': ' ', 64 | '50': ' ', 65 | '55': ' ', 66 | '56': ' ', 67 | 68 | } 69 | instr = step['language_instruction'] 70 | instr = clean_task_instruction(instr, replacements) 71 | step['observation']['natural_language_instruction'] = instr 72 | 73 | return step 74 | 75 | 76 | if __name__ == "__main__": 77 | import tensorflow_datasets as tfds 78 | from data.utils import dataset_to_path 79 | 80 | DATASET_DIR = 'data/datasets/openx_embod' 81 | DATASET_NAME = 'fractal20220817_data' 82 | # Load the dataset 83 | dataset = tfds.builder_from_directory( 84 | builder_dir=dataset_to_path( 85 | DATASET_NAME, DATASET_DIR)) 86 | dataset = dataset.as_dataset(split='all') 87 | 88 | # Inspect the dataset 89 | for episode in dataset: 90 | for step in episode['steps']: 91 | print(step) 92 | -------------------------------------------------------------------------------- /data/preprocess_scripts/ucsd_pick_and_place_dataset_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | 21 | # Convert raw action to our action 22 | action = step['action'] 23 | # Robot action, consists of [3x gripper velocities,1x gripper open/close torque]. 24 | eef_vel = action[:3] 25 | grip_open = tf.expand_dims(1 - action[3], axis=0) 26 | 27 | # Concatenate the action 28 | # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 29 | step['action'] = {} 30 | action = step['action'] 31 | 32 | action['terminate'] = step['is_terminal'] 33 | action['arm_concat'] = tf.concat([eef_vel,grip_open], axis=0) 34 | # Write the action format 35 | action['format'] = tf.constant( 36 | "eef_vel_x,eef_vel_y,eef_vel_z,gripper_open") 37 | 38 | # Convert raw state to our state 39 | state = step['observation']['state'] 40 | # Robot state, consists of [3x gripper position,3x gripper orientation, 1x finger distance]. 41 | gripper_pos = state[:3]/10 # dm -> m 42 | gripper_ang = state[3:6] 43 | gripper_ang = euler_to_rotation_matrix(gripper_ang) 44 | gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) 45 | gripper_open = state[6:7]/6.03 # 1 for open 46 | 47 | 48 | # Concatenate the state 49 | state = step['observation'] 50 | state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0) 51 | 52 | # Write the state format 53 | state['format'] = tf.constant( 54 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") 55 | 56 | # Clean the task instruction 57 | # Define the replacements (old, new) as a dictionary 58 | replacements = { 59 | '_': ' ', 60 | '1f': ' ', 61 | '4f': ' ', 62 | '-': ' ', 63 | '50': ' ', 64 | '55': ' ', 65 | '56': ' ', 66 | 67 | } 68 | instr = step['language_instruction'] 69 | instr = clean_task_instruction(instr, replacements) 70 | step['observation']['natural_language_instruction'] = instr 71 | 72 | return step 73 | 74 | 75 | if __name__ == "__main__": 76 | import tensorflow_datasets as tfds 77 | from data.utils import dataset_to_path 78 | 79 | DATASET_DIR = 'data/datasets/openx_embod' 80 | DATASET_NAME = 'fractal20220817_data' 81 | # Load the dataset 82 | dataset = tfds.builder_from_directory( 83 | builder_dir=dataset_to_path( 84 | DATASET_NAME, DATASET_DIR)) 85 | dataset = dataset.as_dataset(split='all') 86 | 87 | # Inspect the dataset 88 | for episode in dataset: 89 | for step in episode['steps']: 90 | print(step) 91 | -------------------------------------------------------------------------------- /data/preprocess_scripts/uiuc_d3field.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, rotation_matrix_to_ortho6d 4 | 5 | def process_step(step: dict) -> dict: 6 | """ 7 | Unify the action format and clean the task instruction. 8 | 9 | DO NOT use python list, use tf.TensorArray instead. 10 | """ 11 | # Convert raw action to our action 12 | 13 | origin_action = step['action'] 14 | step['action']={} 15 | action=step['action'] 16 | action['terminate'] = step['is_terminal'] 17 | # Robot displacement from last frame 18 | eef_delta_pos = origin_action[:3] 19 | 20 | # No base found 21 | 22 | # Concatenate the action 23 | action['arm_concat'] = eef_delta_pos 24 | 25 | # Write the action format 26 | action['format'] = tf.constant( 27 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z") 28 | 29 | # Convert raw state to our state 30 | state = step['observation'] 31 | # Concatenate the state 32 | # 4x4 Robot end-effector state 33 | eef_mat=state['state'] 34 | eef_pos=eef_mat[:3,3] 35 | rotation_matrix=eef_mat[:3,:3] 36 | eef_ang = rotation_matrix_to_ortho6d(rotation_matrix) 37 | state['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) 38 | 39 | # Write the state format 40 | state['format'] = tf.constant( 41 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 42 | 43 | # Clean the task instruction 44 | # Define the replacements (old, new) as a dictionary 45 | replacements = { 46 | '_': ' ', 47 | '1f': ' ', 48 | '4f': ' ', 49 | '-': ' ', 50 | '50': ' ', 51 | '55': ' ', 52 | '56': ' ', 53 | 54 | } 55 | instr = step['language_instruction'] 56 | instr = clean_task_instruction(instr, replacements) 57 | step['observation']['natural_language_instruction'] = instr 58 | 59 | return step 60 | 61 | 62 | if __name__ == "__main__": 63 | import tensorflow_datasets as tfds 64 | from data.utils import dataset_to_path 65 | 66 | DATASET_DIR = 'data/datasets/openx_embod' 67 | DATASET_NAME = 'uiuc_d3field' 68 | # Load the dataset 69 | dataset = tfds.builder_from_directory( 70 | builder_dir=dataset_to_path( 71 | DATASET_NAME, DATASET_DIR)) 72 | dataset = dataset.as_dataset(split='all') 73 | 74 | # Inspect the dataset 75 | for episode in dataset: 76 | for step in episode['steps']: 77 | print(step) 78 | -------------------------------------------------------------------------------- /data/preprocess_scripts/usc_cloth_sim_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | 20 | # Convert raw action to our action 21 | # Robot action, consists of x,y,z goal and picker commandpicker<0.5 = open, picker>0.5 = close. 22 | action = step['action'] 23 | eef_delta_pos = action[:3] 24 | grip_open = tf.cast(tf.expand_dims(action[3] < 0.5, axis=0), dtype=tf.float32) 25 | 26 | # Concatenate the action 27 | step['action'] = {} 28 | action = step['action'] 29 | 30 | action['terminate'] = step['is_terminal'] 31 | action['arm_concat'] = tf.concat([eef_delta_pos, grip_open], axis=0) 32 | 33 | # Write the action format 34 | action['format'] = tf.constant( 35 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,gripper_open") 36 | 37 | # State doesnt exist in this dataset 38 | 39 | # Clean the task instruction 40 | # Define the replacements (old, new) as a dictionary 41 | replacements = { 42 | '_': ' ', 43 | '1f': ' ', 44 | '4f': ' ', 45 | '-': ' ', 46 | '50': ' ', 47 | '55': ' ', 48 | '56': ' ', 49 | 50 | } 51 | instr = step['language_instruction'] 52 | instr = clean_task_instruction(instr, replacements) 53 | step['observation']['natural_language_instruction'] = instr 54 | 55 | return step 56 | 57 | 58 | if __name__ == "__main__": 59 | import tensorflow_datasets as tfds 60 | from data.utils import dataset_to_path 61 | 62 | DATASET_DIR = 'data/datasets/openx_embod' 63 | DATASET_NAME = 'fractal20220817_data' 64 | # Load the dataset 65 | dataset = tfds.builder_from_directory( 66 | builder_dir=dataset_to_path( 67 | DATASET_NAME, DATASET_DIR)) 68 | dataset = dataset.as_dataset(split='all') 69 | 70 | # Inspect the dataset 71 | for episode in dataset: 72 | for step in episode['steps']: 73 | print(step) 74 | -------------------------------------------------------------------------------- /data/preprocess_scripts/utaustin_mutex.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, rotation_matrix_to_ortho6d 4 | 5 | def process_step(step: dict) -> dict: 6 | """ 7 | Unify the action format and clean the task instruction. 8 | 9 | DO NOT use python list, use tf.TensorArray instead. 10 | """ 11 | # Convert raw action to our action 12 | 13 | origin_action = step['action'] 14 | step['action']={} 15 | action=step['action'] 16 | action['terminate'] = step['is_terminal'] 17 | # 6x end effector delta pose, 1x gripper position 18 | eef_delta_pos = origin_action[:3] 19 | eef_ang=origin_action[3:6] 20 | eef_ang = euler_to_quaternion(eef_ang) 21 | grip_open=origin_action[6:7] 22 | # No base found 23 | 24 | # Concatenate the action 25 | action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0) 26 | 27 | # Write the action format 28 | action['format'] = tf.constant( 29 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 30 | 31 | # Convert raw state to our state 32 | state = step['observation'] 33 | # Concatenate the state 34 | arm_joint_ang=state['state'][:7] 35 | grip_open=state['state'][7:8] * 13.21 # rescale to [0, 1] 36 | # 4x4 Robot end-effector state 37 | eef_mat = tf.transpose(tf.reshape(state['state'][8:], (4, 4))) 38 | eef_pos = eef_mat[:3, 3] 39 | rotation_matrix = eef_mat[:3, :3] 40 | eef_ang = rotation_matrix_to_ortho6d(rotation_matrix) 41 | state['arm_concat'] = tf.concat([arm_joint_ang,grip_open,eef_pos,eef_ang],axis=0) 42 | 43 | # Write the state format 44 | state['format'] = tf.constant( 45 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 46 | 47 | # Clean the task instruction 48 | # Define the replacements (old, new) as a dictionary 49 | replacements = { 50 | '_': ' ', 51 | '1f': ' ', 52 | '4f': ' ', 53 | '-': ' ', 54 | '50': ' ', 55 | '55': ' ', 56 | '56': ' ', 57 | 58 | } 59 | instr = step['language_instruction'] 60 | instr = clean_task_instruction(instr, replacements) 61 | step['observation']['natural_language_instruction'] = instr 62 | 63 | return step 64 | 65 | 66 | if __name__ == "__main__": 67 | import tensorflow_datasets as tfds 68 | from data.utils import dataset_to_path 69 | 70 | DATASET_DIR = 'data/datasets/openx_embod' 71 | DATASET_NAME = 'utaustin_mutex' 72 | # Load the dataset 73 | dataset = tfds.builder_from_directory( 74 | builder_dir=dataset_to_path( 75 | DATASET_NAME, DATASET_DIR)) 76 | dataset = dataset.as_dataset(split='all') 77 | 78 | # Inspect the dataset 79 | for episode in dataset: 80 | for step in episode['steps']: 81 | print(step) 82 | -------------------------------------------------------------------------------- /data/preprocess_scripts/utokyo_pr2_opening_fridge_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | 21 | # Convert raw action to our action 22 | action = step['action'] 23 | # Robot action, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper open/close command, 1x terminal action]. 24 | eef_delta_pos = action[:3]/1000 # change from mm to m 25 | eef_ang = action[3:6] 26 | eef_ang = euler_to_quaternion(eef_ang) 27 | grip_open = tf.expand_dims(1 - action[6], axis=0) 28 | 29 | # Concatenate the action 30 | step['action'] = {} 31 | action = step['action'] 32 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 33 | 34 | action['terminate'] = step['is_terminal'] 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 38 | 39 | 40 | # Convert raw state to our state 41 | state = step['observation']['state'] 42 | # Robot state, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper position]. 43 | gripper_pos = state[:3]/1000 # change from mm to m 44 | gripper_ang = state[3:6] 45 | gripper_ang = euler_to_rotation_matrix(gripper_ang) 46 | gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) 47 | gripper_open = state[6:7]/1000 * 11.54 # rescale to [0, 1] 48 | 49 | 50 | # Concatenate the state 51 | state = step['observation'] 52 | state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0) 53 | 54 | # Write the state format 55 | state['format'] = tf.constant( 56 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_joint_0_pos") 57 | 58 | # Clean the task instruction 59 | # Define the replacements (old, new) as a dictionary 60 | replacements = { 61 | '_': ' ', 62 | '1f': ' ', 63 | '4f': ' ', 64 | '-': ' ', 65 | '50': ' ', 66 | '55': ' ', 67 | '56': ' ', 68 | 69 | } 70 | instr = step['language_instruction'] 71 | instr = clean_task_instruction(instr, replacements) 72 | step['observation']['natural_language_instruction'] = instr 73 | 74 | return step 75 | 76 | 77 | if __name__ == "__main__": 78 | import tensorflow_datasets as tfds 79 | from data.utils import dataset_to_path 80 | 81 | DATASET_DIR = 'data/datasets/openx_embod' 82 | DATASET_NAME = 'fractal20220817_data' 83 | # Load the dataset 84 | dataset = tfds.builder_from_directory( 85 | builder_dir=dataset_to_path( 86 | DATASET_NAME, DATASET_DIR)) 87 | dataset = dataset.as_dataset(split='all') 88 | 89 | # Inspect the dataset 90 | for episode in dataset: 91 | for step in episode['steps']: 92 | print(step) 93 | -------------------------------------------------------------------------------- /data/preprocess_scripts/utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ 4 | rotation_matrix_to_ortho6d 5 | 6 | 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 8 | """ 9 | Convert terminate action to a boolean, where True means terminate. 10 | """ 11 | return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) 12 | 13 | 14 | def process_step(step: dict) -> dict: 15 | """ 16 | Unify the action format and clean the task instruction. 17 | 18 | DO NOT use python list, use tf.TensorArray instead. 19 | """ 20 | 21 | # Convert raw action to our action 22 | action = step['action'] 23 | # Robot action, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper open/close command, 1x terminal action]. 24 | eef_delta_pos = action[:3]/1000 # change from mm to m 25 | eef_ang = action[3:6] 26 | eef_ang = euler_to_quaternion(eef_ang) 27 | grip_open = tf.expand_dims(1 - action[6], axis=0) 28 | 29 | # Concatenate the action 30 | step['action'] = {} 31 | action = step['action'] 32 | action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 33 | 34 | action['terminate'] = step['is_terminal'] 35 | # Write the action format 36 | action['format'] = tf.constant( 37 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 38 | 39 | 40 | # Convert raw state to our state 41 | state = step['observation']['state'] 42 | # Robot state, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper position]. 43 | gripper_pos = state[:3]/1000 # change from mm to m 44 | gripper_ang = state[3:6] 45 | gripper_ang = euler_to_rotation_matrix(gripper_ang) 46 | gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) 47 | gripper_open = state[6:7] / 86.12126922607422 # rescale to [0, 1] 48 | 49 | # Concatenate the state 50 | state = step['observation'] 51 | state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0) 52 | 53 | # Write the state format 54 | state['format'] = tf.constant( 55 | "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_joint_0_pos") 56 | 57 | # Clean the task instruction 58 | # Define the replacements (old, new) as a dictionary 59 | replacements = { 60 | '_': ' ', 61 | '1f': ' ', 62 | '4f': ' ', 63 | '-': ' ', 64 | '50': ' ', 65 | '55': ' ', 66 | '56': ' ', 67 | 68 | } 69 | instr = step['language_instruction'] 70 | instr = clean_task_instruction(instr, replacements) 71 | step['observation']['natural_language_instruction'] = instr 72 | 73 | return step 74 | 75 | 76 | if __name__ == "__main__": 77 | import tensorflow_datasets as tfds 78 | from data.utils import dataset_to_path 79 | 80 | DATASET_DIR = 'data/datasets/openx_embod' 81 | DATASET_NAME = 'fractal20220817_data' 82 | # Load the dataset 83 | dataset = tfds.builder_from_directory( 84 | builder_dir=dataset_to_path( 85 | DATASET_NAME, DATASET_DIR)) 86 | dataset = dataset.as_dataset(split='all') 87 | 88 | # Inspect the dataset 89 | for episode in dataset: 90 | for step in episode['steps']: 91 | print(step) 92 | -------------------------------------------------------------------------------- /data/preprocess_scripts/viola.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from data.utils import clean_task_instruction, euler_to_quaternion, rotation_matrix_to_ortho6d 4 | 5 | 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: 7 | """ 8 | Convert terminate action to a boolean, where True means terminate. 9 | """ 10 | return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) 11 | 12 | 13 | def process_step(step: dict) -> dict: 14 | """ 15 | Unify the action format and clean the task instruction. 16 | 17 | DO NOT use python list, use tf.TensorArray instead. 18 | """ 19 | # Convert raw action to our action 20 | action = step['action'] 21 | action['terminate'] = terminate_act_to_bool(action['terminate_episode']) 22 | 23 | eef_delta_pos = action['world_vector'] 24 | eef_ang = action['rotation_delta'] 25 | eef_ang = euler_to_quaternion(eef_ang) 26 | grip_open = tf.reshape(tf.where(action['gripper_closedness_action']<0,tf.constant(1.0),tf.constant(0.0)),(1,)) 27 | 28 | # No base found 29 | 30 | # Concatenate the action 31 | arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) 32 | action['arm_concat'] = arm_action 33 | 34 | # Write the action format 35 | action['format'] = tf.constant( 36 | "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") 37 | 38 | # Convert raw state to our state 39 | state = step['observation'] 40 | joint_pos=state['joint_states'] 41 | grip_open=state['gripper_states'] * 12.905 # rescale to [0, 1] 42 | state_ee=state['ee_states'] 43 | transform_matrix = tf.transpose(tf.reshape(state_ee, (4, 4))) 44 | eef_pos = transform_matrix[:3, 3] 45 | rotation_matrix = transform_matrix[:3, :3] 46 | eef_ang = rotation_matrix_to_ortho6d(rotation_matrix) 47 | 48 | # Concatenate the state 49 | state['arm_concat'] = tf.concat([joint_pos,grip_open,eef_pos,eef_ang],axis=0) 50 | 51 | # Write the state format 52 | state['format'] = tf.constant( 53 | "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") 54 | 55 | # Clean the task instruction 56 | # Define the replacements (old, new) as a dictionary 57 | replacements = { 58 | '_': ' ', 59 | '1f': ' ', 60 | '4f': ' ', 61 | '-': ' ', 62 | '50': ' ', 63 | '55': ' ', 64 | '56': ' ', 65 | 66 | } 67 | instr = step['observation']['natural_language_instruction'] 68 | instr = clean_task_instruction(instr, replacements) 69 | step['observation']['natural_language_instruction'] = instr 70 | 71 | return step 72 | 73 | 74 | if __name__ == "__main__": 75 | import tensorflow_datasets as tfds 76 | from data.utils import dataset_to_path 77 | 78 | DATASET_DIR = 'data/datasets/openx_embod' 79 | DATASET_NAME = 'viola' 80 | # Load the dataset 81 | dataset = tfds.builder_from_directory( 82 | builder_dir=dataset_to_path( 83 | DATASET_NAME, DATASET_DIR)) 84 | dataset = dataset.as_dataset(split='all') 85 | 86 | # Inspect the dataset 87 | for episode in dataset: 88 | for step in episode['steps']: 89 | print(step) 90 | -------------------------------------------------------------------------------- /data/roboset/download.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | from tqdm import tqdm 4 | 5 | links = [] 6 | with open('links.txt', 'r', encoding='utf-8') as file: 7 | for line in file: 8 | links.append(line.strip()) 9 | 10 | download_dir = "../datasets/roboset" 11 | os.makedirs(download_dir, exist_ok=True) 12 | 13 | for link in links: 14 | filename = os.path.basename(link) 15 | filepath = os.path.join(download_dir, filename) 16 | print(f"Downloading {filename} from {link}") 17 | 18 | response = requests.get(link, stream=True) 19 | total_size_in_bytes = int(response.headers.get('content-length', 0)) 20 | block_size = 1024 21 | 22 | if os.path.exists(filepath): 23 | local_size = os.path.getsize(filepath) 24 | if local_size == total_size_in_bytes: 25 | print(f"{filename} already exists") 26 | continue 27 | 28 | progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) 29 | 30 | with open(filepath, 'wb') as f: 31 | for data in response.iter_content(block_size): 32 | progress_bar.update(len(data)) 33 | f.write(data) 34 | 35 | progress_bar.close() 36 | 37 | if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes: 38 | print("ERROR, something went wrong") 39 | 40 | print(f"Downloaded {filename}") 41 | 42 | print("All files processed.") 43 | -------------------------------------------------------------------------------- /data/roboset/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while true; do 4 | python download.py 5 | EXIT_CODE=$? 6 | if [ $EXIT_CODE -ne 0 ]; then 7 | echo "Download exited with code $EXIT_CODE. Restarting..." 8 | else 9 | echo "Download exited with code 0. Not restarting." 10 | break 11 | fi 12 | done 13 | 14 | # Unzip all the files in the ../datasets/roboset/ directory 15 | cd ../datasets/roboset/ 16 | for file in *.tar.gz; do 17 | tar -xzvf "$file" 18 | done 19 | 20 | ## Convert the dataset to tfrecords 21 | python hdf5totfrecords.py -------------------------------------------------------------------------------- /docs/test_6drot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial.transform import Rotation as R 3 | 4 | 5 | def convert_quaternion_to_euler(quat): 6 | """ 7 | Convert Quarternion (xyzw) to Euler angles (rpy) 8 | """ 9 | # Normalize 10 | quat = quat / np.linalg.norm(quat) 11 | euler = R.from_quat(quat).as_euler('xyz') 12 | 13 | return euler 14 | 15 | 16 | def convert_euler_to_quaternion(euler): 17 | """ 18 | Convert Euler angles (rpy) to Quarternion (xyzw) 19 | """ 20 | quat = R.from_euler('xyz', euler).as_quat() 21 | 22 | return quat 23 | 24 | 25 | def convert_euler_to_rotation_matrix(euler): 26 | """ 27 | Convert Euler angles (rpy) to rotation matrix (3x3). 28 | """ 29 | quat = R.from_euler('xyz', euler).as_matrix() 30 | 31 | return quat 32 | 33 | 34 | def convert_rotation_matrix_to_euler(rotmat): 35 | """ 36 | Convert rotation matrix (3x3) to Euler angles (rpy). 37 | """ 38 | r = R.from_matrix(rotmat) 39 | euler = r.as_euler('xyz', degrees=False) 40 | 41 | return euler 42 | 43 | 44 | def normalize_vector(v): 45 | v_mag = np.linalg.norm(v, axis=-1, keepdims=True) 46 | v_mag = np.maximum(v_mag, 1e-8) 47 | return v / v_mag 48 | 49 | 50 | def cross_product(u, v): 51 | i = u[:,1]*v[:,2] - u[:,2]*v[:,1] 52 | j = u[:,2]*v[:,0] - u[:,0]*v[:,2] 53 | k = u[:,0]*v[:,1] - u[:,1]*v[:,0] 54 | 55 | out = np.stack((i, j, k), axis=1) 56 | return out 57 | 58 | 59 | def compute_rotation_matrix_from_ortho6d(ortho6d): 60 | x_raw = ortho6d[:, 0:3] 61 | y_raw = ortho6d[:, 3:6] 62 | 63 | x = normalize_vector(x_raw) 64 | z = cross_product(x, y_raw) 65 | z = normalize_vector(z) 66 | y = cross_product(z, x) 67 | 68 | x = x.reshape(-1, 3, 1) 69 | y = y.reshape(-1, 3, 1) 70 | z = z.reshape(-1, 3, 1) 71 | matrix = np.concatenate((x, y, z), axis=2) 72 | return matrix 73 | 74 | 75 | def compute_ortho6d_from_rotation_matrix(matrix): 76 | # The ortho6d represents the first two column vectors a1 and a2 of the 77 | # rotation matrix: [ | , |, | ] 78 | # [ a1, a2, a3] 79 | # [ | , |, | ] 80 | ortho6d = matrix[:, :, :2].transpose(0, 2, 1).reshape(matrix.shape[0], -1) 81 | return ortho6d 82 | 83 | 84 | # Test 85 | if __name__ == "__main__": 86 | # Randomly generate a euler ange 87 | euler = np.random.rand(3) * 2 * np.pi - np.pi 88 | euler = euler[None, :] # Add batch dimension 89 | print(f"Input Euler angles: {euler}") 90 | 91 | # Convert to 6D Rotation 92 | rotmat = convert_euler_to_rotation_matrix(euler) 93 | ortho6d = compute_ortho6d_from_rotation_matrix(rotmat) 94 | print(f"6D Rotation: {ortho6d}") 95 | 96 | # Convert back to Euler angles 97 | rotmat_recovered = compute_rotation_matrix_from_ortho6d(ortho6d) 98 | euler_recovered = convert_rotation_matrix_to_euler(rotmat_recovered) 99 | print(f"Recovered Euler angles: {euler_recovered}") 100 | -------------------------------------------------------------------------------- /finetune.sh: -------------------------------------------------------------------------------- 1 | export NCCL_IB_HCA=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_7:1,mlx5_8:1,mlx5_9:1 2 | export NCCL_IB_DISABLE=0 3 | export NCCL_SOCKET_IFNAME=bond0 4 | export NCCL_DEBUG=INFO 5 | export NCCL_NVLS_ENABLE=0 6 | 7 | export TEXT_ENCODER_NAME="google/t5-v1_1-xxl" 8 | export VISION_ENCODER_NAME="google/siglip-so400m-patch14-384" 9 | export OUTPUT_DIR="./checkpoints/rdt-finetune-1b" 10 | export CFLAGS="-I/usr/include" 11 | export LDFLAGS="-L/usr/lib/x86_64-linux-gnu" 12 | export CUTLASS_PATH="/path/to/cutlass" 13 | 14 | export WANDB_PROJECT="robotics_diffusion_transformer" 15 | 16 | if [ ! -d "$OUTPUT_DIR" ]; then 17 | mkdir "$OUTPUT_DIR" 18 | echo "Folder '$OUTPUT_DIR' created" 19 | else 20 | echo "Folder '$OUTPUT_DIR' already exists" 21 | fi 22 | 23 | # For run in a single node/machine 24 | # accelerate launch main.py \ 25 | # --deepspeed="./configs/zero2.json" \ 26 | # ... 27 | 28 | deepspeed --hostfile=hostfile.txt main.py \ 29 | --deepspeed="./configs/zero2.json" \ 30 | --pretrained_model_name_or_path="robotics-diffusion-transformer/rdt-1b" \ 31 | --pretrained_text_encoder_name_or_path=$TEXT_ENCODER_NAME \ 32 | --pretrained_vision_encoder_name_or_path=$VISION_ENCODER_NAME \ 33 | --output_dir=$OUTPUT_DIR \ 34 | --train_batch_size=32 \ 35 | --sample_batch_size=64 \ 36 | --max_train_steps=200000 \ 37 | --checkpointing_period=1000 \ 38 | --sample_period=500 \ 39 | --checkpoints_total_limit=40 \ 40 | --lr_scheduler="constant" \ 41 | --learning_rate=1e-4 \ 42 | --mixed_precision="bf16" \ 43 | --dataloader_num_workers=8 \ 44 | --image_aug \ 45 | --dataset_type="finetune" \ 46 | --state_noise_snr=40 \ 47 | --load_from_hdf5 \ 48 | --report_to=wandb 49 | 50 | # Use this to resume training from some previous checkpoint 51 | # --resume_from_checkpoint="checkpoint-36000" \ 52 | # Use this to load from saved lanuage instruction embeddings, 53 | # instead of calculating it during training 54 | # --precomp_lang_embed \ 55 | -------------------------------------------------------------------------------- /finetune_maniskill.sh: -------------------------------------------------------------------------------- 1 | export NCCL_IB_HCA=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_7:1,mlx5_8:1,mlx5_9:1 2 | export NCCL_IB_DISABLE=0 3 | export NCCL_SOCKET_IFNAME=bond0 4 | export NCCL_DEBUG=INFO 5 | export NCCL_NVLS_ENABLE=0 6 | 7 | export TEXT_ENCODER_NAME="google/t5-v1_1-xxl" 8 | export VISION_ENCODER_NAME="google/siglip-so400m-patch14-384" 9 | export OUTPUT_DIR="./checkpoints/rdt-finetune-1b-sim" 10 | export CFLAGS="-I/usr/include" 11 | export LDFLAGS="-L/usr/lib/x86_64-linux-gnu" 12 | export CUTLASS_PATH="/data/lingxuan/cutlass" 13 | 14 | export WANDB_PROJECT="robotic_diffusion_transformer" 15 | 16 | if [ ! -d "$OUTPUT_DIR" ]; then 17 | mkdir "$OUTPUT_DIR" 18 | echo "Folder '$OUTPUT_DIR' created" 19 | else 20 | echo "Folder '$OUTPUT_DIR' already exists" 21 | fi 22 | # For run in a single node/machine 23 | # accelerate launch main.py \ 24 | # --deepspeed="./configs/zero2.json" \ 25 | # ... 26 | 27 | accelerate launch main.py \ 28 | --deepspeed="./configs/zero2.json" \ 29 | --pretrained_model_name_or_path="robotics-diffusion-transformer/rdt-1b" \ 30 | --pretrained_text_encoder_name_or_path=$TEXT_ENCODER_NAME \ 31 | --pretrained_vision_encoder_name_or_path=$VISION_ENCODER_NAME \ 32 | --output_dir=$OUTPUT_DIR \ 33 | --train_batch_size=24 \ 34 | --sample_batch_size=32 \ 35 | --max_train_steps=400000 \ 36 | --checkpointing_period=10000 \ 37 | --sample_period=500 \ 38 | --checkpoints_total_limit=40 \ 39 | --lr_scheduler="constant" \ 40 | --learning_rate=1e-4 \ 41 | --mixed_precision="bf16" \ 42 | --dataloader_num_workers=8 \ 43 | --image_aug \ 44 | --dataset_type="finetune" \ 45 | --state_noise_snr=40 \ 46 | --load_from_hdf5 \ 47 | --report_to=wandb 48 | 49 | -------------------------------------------------------------------------------- /inference.sh: -------------------------------------------------------------------------------- 1 | python -m scripts.agilex_inference \ 2 | --use_actions_interpolation \ 3 | --pretrained_model_name_or_path="checkpoints/your_finetuned_ckpt.pt" \ # your finetuned checkpoint: e.g., checkpoints/rdt-finetune-1b/checkpoint-, checkpoints/rdt-finetune-1b/checkpoint-/pytorch_model/mp_rank_00_model_states.pt, 4 | --lang_embeddings_path="outs/lang_embeddings/your_instr.pt" \ 5 | --ctrl_freq=25 # your control frequency 6 | -------------------------------------------------------------------------------- /models/ema_model.py: -------------------------------------------------------------------------------- 1 | # Reference: DiffusionPolicy [https://github.com/real-stanford/diffusion_policy] 2 | 3 | import torch 4 | from torch.nn.modules.batchnorm import _BatchNorm 5 | 6 | 7 | class EMAModel: 8 | """ 9 | Exponential Moving Average of models weights 10 | """ 11 | def __init__( 12 | self, 13 | model, 14 | update_after_step=0, 15 | inv_gamma=1.0, 16 | power=2 / 3, 17 | min_value=0.0, 18 | max_value=0.9999 19 | ): 20 | """ 21 | @crowsonkb's notes on EMA Warmup: 22 | If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan 23 | to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps), 24 | gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999 25 | at 215.4k steps). 26 | Args: 27 | inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1. 28 | power (float): Exponential factor of EMA warmup. Default: 2/3. 29 | min_value (float): The minimum EMA decay rate. Default: 0. 30 | """ 31 | 32 | self.averaged_model = model 33 | self.averaged_model.eval() 34 | self.averaged_model.requires_grad_(False) 35 | 36 | self.update_after_step = update_after_step 37 | self.inv_gamma = inv_gamma 38 | self.power = power 39 | self.min_value = min_value 40 | self.max_value = max_value 41 | 42 | self.decay = 0.0 43 | self.optimization_step = 0 44 | 45 | def get_decay(self, optimization_step): 46 | """ 47 | Compute the decay factor for the exponential moving average. 48 | """ 49 | step = max(0, optimization_step - self.update_after_step - 1) 50 | value = 1 - (1 + step / self.inv_gamma) ** -self.power 51 | 52 | if step <= 0: 53 | return 0.0 54 | 55 | return max(self.min_value, min(value, self.max_value)) 56 | 57 | @torch.no_grad() 58 | def step(self, new_model): 59 | self.decay = self.get_decay(self.optimization_step) 60 | 61 | # old_all_dataptrs = set() 62 | # for param in new_model.parameters(): 63 | # data_ptr = param.data_ptr() 64 | # if data_ptr != 0: 65 | # old_all_dataptrs.add(data_ptr) 66 | 67 | all_dataptrs = set() 68 | for module, ema_module in zip(new_model.modules(), self.averaged_model.modules()): 69 | for param, ema_param in zip(module.parameters(recurse=False), ema_module.parameters(recurse=False)): 70 | # iterative over immediate parameters only. 71 | if isinstance(param, dict): 72 | raise RuntimeError('Dict parameter not supported') 73 | 74 | # data_ptr = param.data_ptr() 75 | # if data_ptr != 0: 76 | # all_dataptrs.add(data_ptr) 77 | 78 | if isinstance(module, _BatchNorm): 79 | # skip batchnorms 80 | ema_param.copy_(param.to(dtype=ema_param.dtype).data) 81 | elif not param.requires_grad: 82 | ema_param.copy_(param.to(dtype=ema_param.dtype).data) 83 | else: 84 | ema_param.mul_(self.decay) 85 | ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay) 86 | 87 | # verify that iterating over module and then parameters is identical to parameters recursively. 88 | # assert old_all_dataptrs == all_dataptrs 89 | self.optimization_step += 1 90 | -------------------------------------------------------------------------------- /models/hub_mixin.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Dict, Optional, Union 4 | 5 | from huggingface_hub import PyTorchModelHubMixin 6 | from huggingface_hub.constants import (PYTORCH_WEIGHTS_NAME, 7 | SAFETENSORS_SINGLE_FILE) 8 | from huggingface_hub.file_download import hf_hub_download 9 | from huggingface_hub.utils import EntryNotFoundError, is_torch_available 10 | 11 | 12 | if is_torch_available(): 13 | import torch # type: ignore 14 | 15 | 16 | class CompatiblePyTorchModelHubMixin(PyTorchModelHubMixin): 17 | """Mixin class to load Pytorch models from the Hub.""" 18 | def _save_pretrained(self, save_directory: Path) -> None: 19 | """Save weights from a Pytorch model to a local directory.""" 20 | # To bypass saving into safetensor by default 21 | model_to_save = self.module if hasattr(self, "module") else self # type: ignore 22 | torch.save(model_to_save.state_dict(), save_directory / PYTORCH_WEIGHTS_NAME) 23 | 24 | @classmethod 25 | def _from_pretrained( 26 | cls, 27 | *, 28 | model_id: str, 29 | revision: Optional[str], 30 | cache_dir: Optional[Union[str, Path]], 31 | force_download: bool, 32 | proxies: Optional[Dict], 33 | resume_download: Optional[bool], 34 | local_files_only: bool, 35 | token: Union[str, bool, None], 36 | map_location: str = "cpu", 37 | strict: bool = False, 38 | **model_kwargs, 39 | ): 40 | """Load Pytorch pretrained weights and return the loaded model.""" 41 | model = cls(**model_kwargs) 42 | if os.path.isdir(model_id): 43 | print("Loading weights from local directory") 44 | try: 45 | model_file = os.path.join(model_id, SAFETENSORS_SINGLE_FILE) 46 | return cls._load_as_safetensor(model, model_file, map_location, strict) 47 | except FileNotFoundError: 48 | model_file = os.path.join(model_id, PYTORCH_WEIGHTS_NAME) 49 | return cls._load_as_pickle(model, model_file, map_location, strict) 50 | else: 51 | try: 52 | model_file = hf_hub_download( 53 | repo_id=model_id, 54 | filename=SAFETENSORS_SINGLE_FILE, 55 | revision=revision, 56 | cache_dir=cache_dir, 57 | force_download=force_download, 58 | proxies=proxies, 59 | resume_download=resume_download, 60 | token=token, 61 | local_files_only=local_files_only, 62 | ) 63 | return cls._load_as_safetensor(model, model_file, map_location, strict) 64 | except EntryNotFoundError: 65 | model_file = hf_hub_download( 66 | repo_id=model_id, 67 | filename=PYTORCH_WEIGHTS_NAME, 68 | revision=revision, 69 | cache_dir=cache_dir, 70 | force_download=force_download, 71 | proxies=proxies, 72 | resume_download=resume_download, 73 | token=token, 74 | local_files_only=local_files_only, 75 | ) 76 | return cls._load_as_pickle(model, model_file, map_location, strict) -------------------------------------------------------------------------------- /models/multimodal_encoder/dinov2_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from transformers import AutoConfig, AutoImageProcessor, AutoModel, Dinov2Model 4 | 5 | 6 | class DinoV2VisionTower(nn.Module): 7 | def __init__(self, vision_tower, args, delay_load=False): 8 | super().__init__() 9 | 10 | self.is_loaded = False 11 | 12 | self.vision_tower_name = vision_tower 13 | self.select_feature = getattr(args, 'mm_vision_select_feature', 'patch') 14 | 15 | if not delay_load: 16 | self.load_model() 17 | elif getattr(args, 'unfreeze_mm_vision_tower', False): 18 | self.load_model() 19 | else: 20 | self.cfg_only = AutoConfig.from_pretrained(self.vision_tower_name) 21 | 22 | def load_model(self, device_map=None): 23 | if self.is_loaded: 24 | print('{} is already loaded, `load_model` called again, skipping.'.format(self.vision_tower_name)) 25 | return 26 | 27 | self.image_processor = AutoImageProcessor.from_pretrained(self.vision_tower_name) 28 | self.vision_tower = AutoModel.from_pretrained(self.vision_tower_name, device_map=device_map) 29 | self.vision_tower.requires_grad_(False) # FIXME: 30 | 31 | self.is_loaded = True 32 | 33 | def feature_select(self, image_forward_outs): 34 | image_features = image_forward_outs.last_hidden_state 35 | if self.select_feature == 'patch': 36 | image_features = image_features[:, 1:] # (B, 1369, 1536) 37 | elif self.select_feature == 'cls_patch': 38 | image_features = image_features # (B, 1, 1536) 39 | else: 40 | raise ValueError(f'Unexpected select feature: {self.select_feature}') 41 | return image_features 42 | 43 | @torch.no_grad() 44 | def forward(self, images): 45 | if type(images) is list: 46 | image_features = [] 47 | for image in images: 48 | image_forward_out = self.vision_tower(image.to(device=self.device, dtype=self.dtype).unsqueeze(0)) 49 | image_feature = self.feature_select(image_forward_out).to(image.dtype) 50 | image_features.append(image_feature) 51 | else: 52 | image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype)) 53 | image_features = self.feature_select(image_forward_outs).to(images.dtype) 54 | 55 | return image_features 56 | 57 | @property 58 | def dummy_feature(self): 59 | return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype) 60 | 61 | @property 62 | def dtype(self): 63 | return self.vision_tower.dtype 64 | 65 | @property 66 | def device(self): 67 | return self.vision_tower.device 68 | 69 | @property 70 | def config(self): 71 | if self.is_loaded: 72 | return self.vision_tower.config 73 | else: 74 | return self.cfg_only 75 | 76 | @property 77 | def hidden_size(self): 78 | return self.config.hidden_size 79 | 80 | @property 81 | def num_patches_per_side(self): 82 | return self.config.image_size // self.config.patch_size 83 | 84 | @property 85 | def num_patches(self): 86 | return (self.config.image_size // self.config.patch_size) ** 2 87 | 88 | -------------------------------------------------------------------------------- /models/multimodal_encoder/siglip_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from transformers import AutoConfig, SiglipImageProcessor, SiglipVisionModel 4 | 5 | 6 | class SiglipVisionTower(nn.Module): 7 | def __init__(self, vision_tower, args, delay_load=False): 8 | super().__init__() 9 | 10 | self.is_loaded = False 11 | 12 | self.vision_tower_name = vision_tower 13 | self.select_feature = getattr(args, 'mm_vision_select_feature', 'patch') 14 | 15 | if not delay_load: 16 | self.load_model() 17 | elif getattr(args, 'unfreeze_mm_vision_tower', False): 18 | self.load_model() 19 | else: 20 | self.cfg_only = AutoConfig.from_pretrained(self.vision_tower_name) 21 | 22 | def load_model(self, device_map=None): 23 | if self.is_loaded: 24 | print('{} is already loaded, `load_model` called again, skipping.'.format(self.vision_tower_name)) 25 | return 26 | 27 | self.image_processor = SiglipImageProcessor.from_pretrained(self.vision_tower_name) 28 | self.vision_tower = SiglipVisionModel.from_pretrained(self.vision_tower_name, device_map=device_map) 29 | self.vision_tower.eval() 30 | 31 | self.is_loaded = True 32 | 33 | def feature_select(self, image_forward_outs): 34 | if self.select_feature == 'patch': 35 | image_features = image_forward_outs.last_hidden_state # (B, 729, 1536) 36 | elif self.select_feature == 'cls_patch': 37 | image_features = image_forward_outs.pooler_output # (B, 1, 1536) 38 | else: 39 | raise ValueError(f'Unexpected select feature: {self.select_feature}') 40 | return image_features 41 | 42 | @torch.no_grad() 43 | def forward(self, images): 44 | if type(images) is list: 45 | image_features = [] 46 | for image in images: 47 | image_forward_out = self.vision_tower(image.to(device=self.device, dtype=self.dtype).unsqueeze(0)) 48 | image_feature = self.feature_select(image_forward_out).to(image.dtype) 49 | image_features.append(image_feature) 50 | else: 51 | image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype)) 52 | image_features = self.feature_select(image_forward_outs).to(images.dtype) 53 | 54 | return image_features 55 | 56 | @property 57 | def dummy_feature(self): 58 | return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype) 59 | 60 | @property 61 | def dtype(self): 62 | return self.vision_tower.dtype 63 | 64 | @property 65 | def device(self): 66 | return self.vision_tower.device 67 | 68 | @property 69 | def config(self): 70 | if self.is_loaded: 71 | return self.vision_tower.config 72 | else: 73 | return self.cfg_only 74 | 75 | @property 76 | def hidden_size(self): 77 | return self.config.hidden_size 78 | 79 | @property 80 | def num_patches_per_side(self): 81 | return self.config.image_size // self.config.patch_size 82 | 83 | @property 84 | def num_patches(self): 85 | return (self.config.image_size // self.config.patch_size) ** 2 86 | 87 | -------------------------------------------------------------------------------- /pretrain.sh: -------------------------------------------------------------------------------- 1 | export NCCL_IB_HCA=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_7:1,mlx5_8:1,mlx5_9:1 2 | export NCCL_IB_DISABLE=0 3 | export NCCL_SOCKET_IFNAME=bond0 4 | export NCCL_DEBUG=INFO 5 | export NCCL_NVLS_ENABLE=0 6 | 7 | export TEXT_ENCODER_NAME="google/t5-v1_1-xxl" 8 | export VISION_ENCODER_NAME="google/siglip-so400m-patch14-384" 9 | export OUTPUT_DIR="./checkpoints/rdt-pretrain-1b" 10 | export CFLAGS="-I/usr/include" 11 | export LDFLAGS="-L/usr/lib/x86_64-linux-gnu" 12 | export CUTLASS_PATH="/path/to/cutlass" 13 | 14 | export WANDB_PROJECT="robotics_diffusion_transformer" 15 | 16 | if [ ! -d "$OUTPUT_DIR" ]; then 17 | mkdir "$OUTPUT_DIR" 18 | echo "Folder '$OUTPUT_DIR' created" 19 | else 20 | echo "Folder '$OUTPUT_DIR' already exists" 21 | fi 22 | 23 | # For run in a single node/machine 24 | # accelerate launch main.py \ 25 | # --deepspeed="./configs/zero2.json" \ 26 | # ... 27 | 28 | deepspeed --hostfile=hostfile.txt main.py \ 29 | --deepspeed="./configs/zero2.json" \ 30 | --pretrained_text_encoder_name_or_path=$TEXT_ENCODER_NAME \ 31 | --pretrained_vision_encoder_name_or_path=$VISION_ENCODER_NAME \ 32 | --output_dir=$OUTPUT_DIR \ 33 | --train_batch_size=32 \ 34 | --sample_batch_size=64 \ 35 | --max_train_steps=1000000 \ 36 | --checkpointing_period=1000 \ 37 | --sample_period=500 \ 38 | --checkpoints_total_limit=40 \ 39 | --lr_scheduler="constant" \ 40 | --learning_rate=1e-4 \ 41 | --mixed_precision="bf16" \ 42 | --dataloader_num_workers=8 \ 43 | --dataset_type="pretrain" \ 44 | --report_to=wandb 45 | 46 | # Use this to resume training from some previous checkpoint 47 | # --resume_from_checkpoint="checkpoint-1000" \ 48 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | packaging==24.0 2 | wandb==0.17.0 3 | deepspeed==0.14.2 4 | accelerate==0.30.1 5 | diffusers==0.27.2 6 | timm==1.0.3 7 | transformers==4.41.0 8 | sentencepiece==0.2.0 9 | h5py==3.11.0 10 | opencv-python==4.9.0.80 11 | imgaug==0.4.0 -------------------------------------------------------------------------------- /requirements_data.txt: -------------------------------------------------------------------------------- 1 | tfds-nightly==4.9.4.dev202402070044 2 | gsutil==5.27 3 | tensorflow==2.15.0.post1 4 | pillow==10.2.0 5 | pyyaml==6.0.1 6 | opencv-python==4.9.0.80 7 | tensorflow-graphics==2021.12.3 8 | imageio==2.34.0 9 | imageio-ffmpeg==0.4.9 -------------------------------------------------------------------------------- /scripts/encode_lang.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import yaml 5 | 6 | from models.multimodal_encoder.t5_encoder import T5Embedder 7 | 8 | 9 | GPU = 0 10 | MODEL_PATH = "google/t5-v1_1-xxl" 11 | CONFIG_PATH = "configs/base.yaml" 12 | SAVE_DIR = "outs/" 13 | 14 | # Modify this to your task name and instruction 15 | TASK_NAME = "handover_pan" 16 | INSTRUCTION = "Pick up the black marker on the right and put it into the packaging box on the left." 17 | 18 | # Note: if your GPU VRAM is less than 24GB, 19 | # it is recommended to enable offloading by specifying an offload directory. 20 | OFFLOAD_DIR = None # Specify your offload directory here, ensuring the directory exists. 21 | 22 | def main(): 23 | with open(CONFIG_PATH, "r") as fp: 24 | config = yaml.safe_load(fp) 25 | 26 | device = torch.device(f"cuda:{GPU}") 27 | text_embedder = T5Embedder( 28 | from_pretrained=MODEL_PATH, 29 | model_max_length=config["dataset"]["tokenizer_max_length"], 30 | device=device, 31 | use_offload_folder=OFFLOAD_DIR 32 | ) 33 | tokenizer, text_encoder = text_embedder.tokenizer, text_embedder.model 34 | 35 | tokens = tokenizer( 36 | INSTRUCTION, return_tensors="pt", 37 | padding="longest", 38 | truncation=True 39 | )["input_ids"].to(device) 40 | 41 | tokens = tokens.view(1, -1) 42 | with torch.no_grad(): 43 | pred = text_encoder(tokens).last_hidden_state.detach().cpu() 44 | 45 | save_path = os.path.join(SAVE_DIR, f"{TASK_NAME}.pt") 46 | # We save the embeddings in a dictionary format 47 | torch.save({ 48 | "name": TASK_NAME, 49 | "instruction": INSTRUCTION, 50 | "embeddings": pred 51 | }, save_path 52 | ) 53 | 54 | print(f'\"{INSTRUCTION}\" from \"{TASK_NAME}\" is encoded by \"{MODEL_PATH}\" into shape {pred.shape} and saved to \"{save_path}\"') 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /scripts/encode_lang_batch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import torch 5 | import yaml 6 | from tqdm import tqdm 7 | 8 | from models.multimodal_encoder.t5_encoder import T5Embedder 9 | 10 | 11 | GPU = 0 12 | MODEL_PATH = "google/t5-v1_1-xxl" 13 | CONFIG_PATH = "configs/base.yaml" 14 | # Modify the TARGET_DIR to your dataset path 15 | TARGET_DIR = "data/datasets/agilex/tfrecords/" 16 | 17 | # Note: if your GPU VRAM is less than 24GB, 18 | # it is recommended to enable offloading by specifying an offload directory. 19 | OFFLOAD_DIR = None # Specify your offload directory here, ensuring the directory exists. 20 | 21 | def main(): 22 | with open(CONFIG_PATH, "r") as fp: 23 | config = yaml.safe_load(fp) 24 | 25 | device = torch.device(f"cuda:{GPU}") 26 | text_embedder = T5Embedder( 27 | from_pretrained=MODEL_PATH, 28 | model_max_length=config["dataset"]["tokenizer_max_length"], 29 | device=device, 30 | use_offload_folder=OFFLOAD_DIR 31 | ) 32 | tokenizer, text_encoder = text_embedder.tokenizer, text_embedder.model 33 | 34 | # Get all the task paths 35 | task_paths = [] 36 | for sub_dir in os.listdir(TARGET_DIR): 37 | middle_dir = os.path.join(TARGET_DIR, sub_dir) 38 | if os.path.isdir(middle_dir): 39 | for task_dir in os.listdir(middle_dir): 40 | task_path = os.path.join(middle_dir, task_dir) 41 | if os.path.isdir(task_path): 42 | task_paths.append(task_path) 43 | 44 | # For each task, encode the instructions 45 | for task_path in tqdm(task_paths): 46 | # Load the instructions corresponding to the task from the directory 47 | with open(os.path.join(task_path, 'expanded_instruction_gpt-4-turbo.json'), 'r') as f_instr: 48 | instruction_dict = json.load(f_instr) 49 | instructions = [instruction_dict['instruction']] + instruction_dict['simplified_instruction'] + \ 50 | instruction_dict['expanded_instruction'] 51 | 52 | # Encode the instructions 53 | tokenized_res = tokenizer( 54 | instructions, return_tensors="pt", 55 | padding="longest", 56 | truncation=True 57 | ) 58 | tokens = tokenized_res["input_ids"].to(device) 59 | attn_mask = tokenized_res["attention_mask"].to(device) 60 | 61 | with torch.no_grad(): 62 | text_embeds = text_encoder( 63 | input_ids=tokens, 64 | attention_mask=attn_mask 65 | )["last_hidden_state"].detach().cpu() 66 | 67 | attn_mask = attn_mask.cpu().bool() 68 | 69 | # Save the embeddings for training use 70 | for i in range(len(instructions)): 71 | text_embed = text_embeds[i][attn_mask[i]] 72 | save_path = os.path.join(task_path, f"lang_embed_{i}.pt") 73 | torch.save(text_embed, save_path) 74 | 75 | if __name__ == "__main__": 76 | main() 77 | -------------------------------------------------------------------------------- /train/image_corrupt.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter(action='ignore', category=FutureWarning) 3 | 4 | import numpy as np 5 | np.bool = np.bool_ 6 | import imgaug.augmenters as iaa 7 | from PIL import Image 8 | 9 | 10 | # Define our sequence of augmentation steps that will be applied to every image. 11 | seq = iaa.Sequential( 12 | [ 13 | # Execute one of the following noise augmentations 14 | iaa.OneOf([ 15 | iaa.AdditiveGaussianNoise( 16 | loc=0, scale=(0.0, 0.05*255), per_channel=0.5 17 | ), 18 | iaa.AdditiveLaplaceNoise(scale=(0.0, 0.05*255), per_channel=0.5), 19 | iaa.AdditivePoissonNoise(lam=(0.0, 0.05*255), per_channel=0.5) 20 | ]), 21 | 22 | # Execute one or none of the following blur augmentations 23 | iaa.SomeOf((0, 1), [ 24 | iaa.OneOf([ 25 | iaa.GaussianBlur((0, 3.0)), 26 | iaa.AverageBlur(k=(2, 7)), 27 | iaa.MedianBlur(k=(3, 11)), 28 | ]), 29 | iaa.MotionBlur(k=(3, 36)), 30 | ]), 31 | ], 32 | # do all of the above augmentations in random order 33 | random_order=True 34 | ) 35 | 36 | 37 | def image_corrupt(image: Image): 38 | image_arr = np.array(image) 39 | image_arr = image_arr[None, ...] 40 | 41 | image_arr = seq(images=image_arr) 42 | 43 | image = Image.fromarray(image_arr[0]) 44 | return image 45 | --------------------------------------------------------------------------------