├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── head.png
    └── iclr2025_poster.png
├── configs
    ├── base.yaml
    ├── calvin_rel_traj_location_bounds_task_ABC_D.json
    ├── dataset_control_freq.json
    ├── dataset_img_keys.json
    ├── dataset_stat.json
    ├── finetune_datasets.json
    ├── finetune_sample_weights.json
    ├── pretrain_datasets.json
    ├── pretrain_sample_weights.json
    ├── state_vec.py
    └── zero2.json
├── data
    ├── .gitignore
    ├── agilex
    │   └── hdf5totfrecords.py
    ├── aloha
    │   ├── hdf5totfrecords.py
    │   └── unzip_data.sh
    ├── bridgev2
    │   ├── bridgedata_numpy_to_tfrecord.py
    │   ├── bridgedata_raw_to_numpy.py
    │   └── download.sh
    ├── calvin
    │   ├── download.sh
    │   └── hdf5totfrecords.py
    ├── compute_dataset_stat.py
    ├── compute_dataset_stat_hdf5.py
    ├── empty_lang_embed.pt
    ├── episode_transform.py
    ├── filelock.py
    ├── hdf5_maniskill_dataset.py
    ├── hdf5_vla_dataset.py
    ├── openx_embod
    │   └── download.sh
    ├── preprocess.py
    ├── preprocess_scripts
    │   ├── __init__.py
    │   ├── agilex.py
    │   ├── aloha_mobile.py
    │   ├── aloha_static.py
    │   ├── asu_table_top_converted_externally_to_rlds.py
    │   ├── austin_buds_dataset_converted_externally_to_rlds.py
    │   ├── austin_sailor_dataset_converted_externally_to_rlds.py
    │   ├── austin_sirius_dataset_converted_externally_to_rlds.py
    │   ├── bc_z.py
    │   ├── berkeley_autolab_ur5.py
    │   ├── berkeley_cable_routing.py
    │   ├── berkeley_fanuc_manipulation.py
    │   ├── berkeley_gnm_cory_hall.py
    │   ├── berkeley_gnm_recon.py
    │   ├── berkeley_gnm_sac_son.py
    │   ├── berkeley_mvp_converted_externally_to_rlds.py
    │   ├── berkeley_rpt_converted_externally_to_rlds.py
    │   ├── bridge.py
    │   ├── bridgev2.py
    │   ├── calvin.py
    │   ├── cmu_franka_exploration_dataset_converted_externally_to_rlds.py
    │   ├── cmu_play_fusion.py
    │   ├── cmu_stretch.py
    │   ├── columbia_cairlab_pusht_real.py
    │   ├── dlr_edan_shared_control_converted_externally_to_rlds.py
    │   ├── dlr_sara_grid_clamp_converted_externally_to_rlds.py
    │   ├── dlr_sara_pour_converted_externally_to_rlds.py
    │   ├── dobbe.py
    │   ├── droid.py
    │   ├── eth_agent_affordances.py
    │   ├── fmb.py
    │   ├── fractal20220817_data.py
    │   ├── furniture_bench_dataset_converted_externally_to_rlds.py
    │   ├── iamlab_cmu_pickup_insert_converted_externally_to_rlds.py
    │   ├── imperialcollege_sawyer_wrist_cam.py
    │   ├── jaco_play.py
    │   ├── kaist_nonprehensile_converted_externally_to_rlds.py
    │   ├── kuka.py
    │   ├── language_table.py
    │   ├── maniskill_dataset_converted_externally_to_rlds.py
    │   ├── nyu_door_opening_surprising_effectiveness.py
    │   ├── nyu_franka_play_dataset_converted_externally_to_rlds.py
    │   ├── nyu_rot_dataset_converted_externally_to_rlds.py
    │   ├── qut_dexterous_manpulation.py
    │   ├── rh20t.py
    │   ├── robo_net.py
    │   ├── robomimic_can_ph.py
    │   ├── robomimic_lift_ph.py
    │   ├── robomimic_square_ph.py
    │   ├── robomimic_tool_hang_ph.py
    │   ├── robomimic_transport_ph.py
    │   ├── roboset.py
    │   ├── roboturk.py
    │   ├── roboturk_real_laundrylayout.py
    │   ├── roboturk_real_objectsearch.py
    │   ├── roboturk_real_towercreation.py
    │   ├── stanford_hydra_dataset_converted_externally_to_rlds.py
    │   ├── stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py
    │   ├── stanford_mask_vit_converted_externally_to_rlds.py
    │   ├── stanford_robocook_converted_externally_to_rlds.py
    │   ├── taco_play.py
    │   ├── tokyo_u_lsmo_converted_externally_to_rlds.py
    │   ├── toto.py
    │   ├── ucsd_kitchen_dataset_converted_externally_to_rlds.py
    │   ├── ucsd_pick_and_place_dataset_converted_externally_to_rlds.py
    │   ├── uiuc_d3field.py
    │   ├── usc_cloth_sim_converted_externally_to_rlds.py
    │   ├── utaustin_mutex.py
    │   ├── utokyo_pr2_opening_fridge_converted_externally_to_rlds.py
    │   ├── utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py
    │   ├── utokyo_xarm_bimanual_converted_externally_to_rlds.py
    │   ├── utokyo_xarm_pick_and_place_converted_externally_to_rlds.py
    │   └── viola.py
    ├── producer.py
    ├── rh20t
    │   └── hdf5totfrecords.py
    ├── roboset
    │   ├── download.py
    │   ├── download.sh
    │   ├── h5totfrecords.py
    │   └── links.txt
    ├── utils.py
    └── vla_dataset.py
├── docs
    ├── pretrain.md
    └── test_6drot.py
├── eval_sim
    ├── eval_dp.py
    ├── eval_octo.py
    ├── eval_openvla.py
    └── eval_rdt_maniskill.py
├── finetune.sh
├── finetune_maniskill.sh
├── inference.sh
├── main.py
├── models
    ├── ema_model.py
    ├── hub_mixin.py
    ├── multimodal_encoder
    │   ├── clip_encoder.py
    │   ├── dinov2_encoder.py
    │   ├── siglip_encoder.py
    │   └── t5_encoder.py
    ├── rdt
    │   ├── blocks.py
    │   └── model.py
    └── rdt_runner.py
├── pretrain.sh
├── requirements.txt
├── requirements_data.txt
├── scripts
    ├── agilex_inference.py
    ├── agilex_model.py
    ├── encode_lang.py
    ├── encode_lang_batch.py
    └── maniskill_model.py
└── train
    ├── dataset.py
    ├── image_corrupt.py
    ├── sample.py
    └── train.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 TSAIL group
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/assets/head.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thu-ml/RoboticsDiffusionTransformer/9af5241cb4456836ddf852b5a0286441f7b5d1d6/assets/head.png


--------------------------------------------------------------------------------
/assets/iclr2025_poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thu-ml/RoboticsDiffusionTransformer/9af5241cb4456836ddf852b5a0286441f7b5d1d6/assets/iclr2025_poster.png


--------------------------------------------------------------------------------
/configs/base.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   # The number of historical images
 3 |   img_history_size: 2
 4 |   # The number of future actions to predict
 5 |   action_chunk_size: 64
 6 |   # The number of cameras to be used in the model
 7 |   num_cameras: 3
 8 |   # Dimension for state/action, we use the same space for both state and action
 9 |   # This MUST be equal to configs/state_vec.py
10 |   state_dim: 128
11 | 
12 | 
13 | dataset:
14 |   # We will extract the data from raw dataset
15 |   # and store them in the disk buffer by producer
16 |   # When training, we will read the data 
17 |   # randomly from the buffer by consumer
18 |   # The producer will replace the data which has been 
19 |   # read by the consumer with new data
20 | 
21 |   # The path to the buffer (at least 400GB)
22 |   buf_path: /path/to/buffer
23 |   # The number of chunks in the buffer
24 |   buf_num_chunks: 512
25 |   # The number of samples (step rather than episode) in each chunk
26 |   buf_chunk_size: 512
27 | 
28 |   # We will filter the episodes with length less than `epsd_len_thresh_low`
29 |   epsd_len_thresh_low: 32
30 |   # For those more than `epsd_len_thresh_high`,
31 |   # we will randomly sample `epsd_len_thresh_high` steps each time we load the episode
32 |   # to better balance the training datasets
33 |   epsd_len_thresh_high: 2048
34 |   # How to fit the image size
35 |   image_aspect_ratio: pad
36 |   # Maximum number of language tokens
37 |   tokenizer_max_length: 1024
38 | 
39 | model:
40 |   # Config for condition adpators
41 |   lang_adaptor: mlp2x_gelu
42 |   img_adaptor: mlp2x_gelu
43 |   state_adaptor: mlp3x_gelu
44 |   lang_token_dim: 4096
45 |   img_token_dim: 1152
46 |   # Dim of action or proprioception vector
47 |   # A `state` refers to an action or a proprioception vector
48 |   state_token_dim: 128
49 |   # Config for RDT structure
50 |   rdt:
51 |     # 1B: num_head 32 hidden_size 2048
52 |     hidden_size: 2048
53 |     depth: 28
54 |     num_heads: 32
55 |     cond_pos_embed_type: multimodal 
56 |   # For noise scheduler
57 |   noise_scheduler:
58 |     type: ddpm
59 |     num_train_timesteps: 1000
60 |     num_inference_timesteps: 5
61 |     beta_schedule: squaredcos_cap_v2  # Critical choice
62 |     prediction_type: sample
63 |     clip_sample: False
64 |   # For EMA (params averaging)
65 |   # We do not use EMA currently
66 |   ema:
67 |     update_after_step: 0
68 |     inv_gamma: 1.0
69 |     power: 0.75
70 |     min_value: 0.0
71 |     max_value: 0.9999
72 | 


--------------------------------------------------------------------------------
/configs/calvin_rel_traj_location_bounds_task_ABC_D.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "A": [
 3 |         [
 4 |             -0.2691913843154907,
 5 |             -0.21995729207992554,
 6 |             -0.182277649641037
 7 |         ],
 8 |         [
 9 |             0.35127854347229004,
10 |             0.2769763469696045,
11 |             0.17159393429756165
12 |         ]
13 |     ],
14 |     "B": [
15 |         [
16 |             -0.2576896846294403,
17 |             -0.22244493663311005,
18 |             -0.20557966828346252
19 |         ],
20 |         [
21 |             0.32854634523391724,
22 |             0.2922680974006653,
23 |             0.17373555898666382
24 |         ]
25 |     ],
26 |     "C": [
27 |         [
28 |             -0.29205888509750366,
29 |             -0.24688798189163208,
30 |             -0.17577645182609558
31 |         ],
32 |         [
33 |             0.25053921341896057,
34 |             0.3277084231376648,
35 |             0.16431939601898193
36 |         ]
37 |     ],
38 |     "D": [
39 |         [
40 |             -0.25131964683532715,
41 |             -0.15233077108860016,
42 |             -0.13294968008995056
43 |         ],
44 |         [
45 |             0.19209328293800354,
46 |             0.19344553351402283,
47 |             0.1370421051979065
48 |         ]
49 |     ]
50 | }


--------------------------------------------------------------------------------
/configs/dataset_control_freq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fractal20220817_data": 3,
 3 |     "taco_play": 15,
 4 |     "jaco_play": 10,
 5 |     "berkeley_cable_routing": 10,
 6 |     "nyu_door_opening_surprising_effectiveness": 3,
 7 |     "viola": 20,
 8 |     "berkeley_autolab_ur5": 5,
 9 |     "toto": 30,
10 |     "kuka": 10,
11 |     "language_table": 10,
12 |     "columbia_cairlab_pusht_real": 10,
13 |     "stanford_kuka_multimodal_dataset_converted_externally_to_rlds": 20,
14 |     "nyu_rot_dataset_converted_externally_to_rlds":3,
15 |     "stanford_hydra_dataset_converted_externally_to_rlds": 10,
16 |     "austin_buds_dataset_converted_externally_to_rlds": 20,
17 |     "nyu_franka_play_dataset_converted_externally_to_rlds": 3,
18 |     "maniskill_dataset_converted_externally_to_rlds": 20,
19 |     "furniture_bench_dataset_converted_externally_to_rlds": 10,
20 |     "ucsd_kitchen_dataset_converted_externally_to_rlds": 2,
21 |     "ucsd_pick_and_place_dataset_converted_externally_to_rlds": 3,
22 |     "austin_sailor_dataset_converted_externally_to_rlds": 20,
23 |     "austin_sirius_dataset_converted_externally_to_rlds": 20,
24 |     "bc_z": 10,
25 |     "utokyo_pr2_opening_fridge_converted_externally_to_rlds": 10,
26 |     "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": 10,
27 |     "utokyo_xarm_pick_and_place_converted_externally_to_rlds": 10,
28 |     "utokyo_xarm_bimanual_converted_externally_to_rlds": 10,
29 |     "berkeley_mvp_converted_externally_to_rlds": 5,
30 |     "berkeley_rpt_converted_externally_to_rlds": 30,
31 |     "kaist_nonprehensile_converted_externally_to_rlds": 10,
32 |     "stanford_mask_vit_converted_externally_to_rlds": 0,
33 |     "tokyo_u_lsmo_converted_externally_to_rlds": 10,
34 |     "dlr_sara_pour_converted_externally_to_rlds": 10,
35 |     "dlr_sara_grid_clamp_converted_externally_to_rlds": 10,
36 |     "dlr_edan_shared_control_converted_externally_to_rlds": 5,
37 |     "asu_table_top_converted_externally_to_rlds": 12.5,
38 |     "stanford_robocook_converted_externally_to_rlds": 5,
39 |     "eth_agent_affordances": 66.6,
40 |     "imperialcollege_sawyer_wrist_cam": 10,
41 |     "iamlab_cmu_pickup_insert_converted_externally_to_rlds": 20,
42 |     "uiuc_d3field": 1,
43 |     "utaustin_mutex": 20,
44 |     "berkeley_fanuc_manipulation": 10,
45 |     "cmu_play_fusion": 5,
46 |     "cmu_stretch": 10,
47 |     "berkeley_gnm_recon": 3,
48 |     "berkeley_gnm_cory_hall": 5,
49 |     "berkeley_gnm_sac_son": 10,
50 |     "robo_net": 1,
51 |     "roboturk_real_towercreation": 10,
52 |     "roboturk_real_laundrylayout": 10,
53 |     "roboturk_real_objectsearch": 10,
54 |     "aloha_mobile": 50,
55 |     "aloha_static": 50,
56 |     "roboset": 5,
57 |     "droid": 15,
58 |     "fmb": 10,
59 |     "dobbe": 30,
60 |     "qut_dexterous_manpulation": 30,
61 |     "agilex": 25,
62 |     "rh20t": 10,
63 |     "calvin": 30,
64 |     "bridgev2": 5
65 | }


--------------------------------------------------------------------------------
/configs/finetune_datasets.json:
--------------------------------------------------------------------------------
1 | [
2 |     "agilex"
3 | ]


--------------------------------------------------------------------------------
/configs/finetune_sample_weights.json:
--------------------------------------------------------------------------------
1 | {
2 |     "agilex": 100
3 | }


--------------------------------------------------------------------------------
/configs/pretrain_datasets.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     "fractal20220817_data",
 3 |     "jaco_play",
 4 |     "taco_play",
 5 |     "berkeley_cable_routing",
 6 |     "viola",
 7 |     "berkeley_autolab_ur5",
 8 |     "toto",
 9 |     "nyu_door_opening_surprising_effectiveness",
10 |     "columbia_cairlab_pusht_real",
11 |     "stanford_kuka_multimodal_dataset_converted_externally_to_rlds",
12 |     "austin_buds_dataset_converted_externally_to_rlds",
13 |     "kuka",
14 |     "utokyo_xarm_bimanual_converted_externally_to_rlds",
15 |     "stanford_hydra_dataset_converted_externally_to_rlds",
16 |     "maniskill_dataset_converted_externally_to_rlds",
17 |     "ucsd_kitchen_dataset_converted_externally_to_rlds",
18 |     "ucsd_pick_and_place_dataset_converted_externally_to_rlds",
19 |     "austin_sailor_dataset_converted_externally_to_rlds",
20 |     "austin_sirius_dataset_converted_externally_to_rlds",
21 |     "bc_z",
22 |     "utokyo_pr2_opening_fridge_converted_externally_to_rlds",
23 |     "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds",
24 |     "utokyo_xarm_pick_and_place_converted_externally_to_rlds",
25 |     "berkeley_mvp_converted_externally_to_rlds",
26 |     "berkeley_rpt_converted_externally_to_rlds",
27 |     "kaist_nonprehensile_converted_externally_to_rlds",
28 |     "tokyo_u_lsmo_converted_externally_to_rlds",
29 |     "dlr_sara_grid_clamp_converted_externally_to_rlds",
30 |     "stanford_robocook_converted_externally_to_rlds",
31 |     "imperialcollege_sawyer_wrist_cam",
32 |     "iamlab_cmu_pickup_insert_converted_externally_to_rlds",
33 |     "utaustin_mutex",
34 |     "berkeley_fanuc_manipulation",
35 |     "cmu_play_fusion",
36 |     "language_table",
37 |     "furniture_bench_dataset_converted_externally_to_rlds",
38 |     "droid",
39 |     "fmb",
40 |     "dobbe",
41 |     "qut_dexterous_manpulation",
42 |     "aloha_mobile",
43 |     "aloha_static",
44 |     "roboset",
45 |     "rh20t",
46 |     "calvin",
47 |     "bridgev2"
48 | ]


--------------------------------------------------------------------------------
/configs/pretrain_sample_weights.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fractal20220817_data": 271,
 3 |     "taco_play": 60,
 4 |     "jaco_play": 33,
 5 |     "berkeley_cable_routing": 8,
 6 |     "nyu_door_opening_surprising_effectiveness": 10,
 7 |     "viola": 12,
 8 |     "berkeley_autolab_ur5": 32,
 9 |     "toto": 32,
10 |     "kuka": 50,
11 |     "language_table": 100,
12 |     "columbia_cairlab_pusht_real": 12,
13 |     "stanford_kuka_multimodal_dataset_converted_externally_to_rlds": 55,
14 |     "stanford_hydra_dataset_converted_externally_to_rlds": 24,
15 |     "austin_buds_dataset_converted_externally_to_rlds": 7,
16 |     "maniskill_dataset_converted_externally_to_rlds": 174,
17 |     "furniture_bench_dataset_converted_externally_to_rlds": 71,
18 |     "ucsd_kitchen_dataset_converted_externally_to_rlds": 12,
19 |     "ucsd_pick_and_place_dataset_converted_externally_to_rlds": 37,
20 |     "austin_sailor_dataset_converted_externally_to_rlds": 15,
21 |     "austin_sirius_dataset_converted_externally_to_rlds": 24,
22 |     "bc_z": 208,
23 |     "utokyo_pr2_opening_fridge_converted_externally_to_rlds": 9,
24 |     "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": 15,
25 |     "utokyo_xarm_pick_and_place_converted_externally_to_rlds": 10,
26 |     "utokyo_xarm_bimanual_converted_externally_to_rlds": 1,
27 |     "berkeley_mvp_converted_externally_to_rlds": 22,
28 |     "berkeley_rpt_converted_externally_to_rlds": 30,
29 |     "kaist_nonprehensile_converted_externally_to_rlds": 14,
30 |     "tokyo_u_lsmo_converted_externally_to_rlds": 7,
31 |     "dlr_sara_grid_clamp_converted_externally_to_rlds": 1,
32 |     "stanford_robocook_converted_externally_to_rlds": 50,
33 |     "imperialcollege_sawyer_wrist_cam": 13,
34 |     "iamlab_cmu_pickup_insert_converted_externally_to_rlds": 25,
35 |     "utaustin_mutex": 39,
36 |     "berkeley_fanuc_manipulation": 20,
37 |     "cmu_play_fusion": 24,
38 |     "droid": 303,
39 |     "fmb": 42,
40 |     "dobbe": 36,
41 |     "qut_dexterous_manpulation": 14,
42 |     "aloha_mobile": 150,
43 |     "aloha_static": 150,
44 |     "roboset": 135,
45 |     "rh20t": 331,
46 |     "calvin": 100,
47 |     "bridgev2": 224
48 | }


--------------------------------------------------------------------------------
/configs/zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "bf16": {
 3 |         "enabled": "auto"
 4 |     },
 5 |     "train_micro_batch_size_per_gpu": "auto",
 6 |     "train_batch_size": "auto",
 7 |     "gradient_accumulation_steps": "auto",
 8 |     "zero_optimization": {
 9 |         "stage": 2,
10 |         "overlap_comm": true,
11 |         "contiguous_gradients": true,
12 |         "sub_group_size": 1e9
13 |     }
14 | }


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore data files
2 | datasets
3 | 


--------------------------------------------------------------------------------
/data/aloha/unzip_data.sh:
--------------------------------------------------------------------------------
1 | cd ../datasets/aloha/
2 | 
3 | unzip aloha_mobile.zip


--------------------------------------------------------------------------------
/data/bridgev2/download.sh:
--------------------------------------------------------------------------------
 1 | # Download the dataset to ../datasets/bridgev2
 2 | mkdir -p ../datasets/bridgev2
 3 | wget -O ../datasets/bridgev2/demos_8_17.zip https://rail.eecs.berkeley.edu/datasets/bridge_release/data/demos_8_17.zip
 4 | mkdir -p ../datasets/bridgev2/raw
 5 | # Unzip the dataset
 6 | unzip '../datasets/bridgev2/*.zip' -d ../datasets/bridgev2/raw
 7 | # Convert the dataset to numpy
 8 | python bridgedata_raw_to_numpy.py --input ../datasets/bridgev2/raw --output ../datasets/bridgev2/npy
 9 | # Convert the dataset to tfrecords
10 | python bridgedata_numpy_to_tfrecord.py --input ../datasets/bridgev2/npy --output ../datasets/bridgev2/tfrecords
11 | # Remove the raw data and numpy data
12 | rm -rf ../datasets/bridgev2/raw
13 | rm -rf ../datasets/bridgev2/npy


--------------------------------------------------------------------------------
/data/calvin/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Downloading CALVIN dataset..."
 4 | 
 5 | # Create calvin folder in ../datasets/calvin/
 6 | mkdir -p ../datasets/calvin/
 7 | 
 8 | cd ../datasets/calvin/
 9 | 
10 | # You can use this for faster downloading
11 | # aria2c -x 16 -s 16 http://calvin.cs.uni-freiburg.de/dataset/task_ABC_D.zip
12 | 
13 | wget http://calvin.cs.uni-freiburg.de/dataset/task_ABC_D.zip
14 | 
15 | echo "Unzipping CALVIN dataset..."
16 | 
17 | unzip task_ABC_D.zip
18 | 
19 | echo "Done downloading and unzipping CALVIN dataset."
20 | 


--------------------------------------------------------------------------------
/data/calvin/hdf5totfrecords.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import os
 3 | import numpy as np
 4 | from tqdm import tqdm
 5 | 
 6 | 
 7 | def _bytes_feature(value):
 8 |     """Returns a bytes_list from a string / byte."""
 9 |     if isinstance(value, type(tf.constant(0))):
10 |         value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
11 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
12 | 
13 | 
14 | def _bool_feature(value):
15 |     """Returns a bool_list from a boolean."""
16 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=[int(value)]))
17 | 
18 | 
19 | def serialize_example(action, robot_obs, rgb_static, rgb_gripper, instruction, terminate_episode):
20 |     # Feature for fixed-length fields
21 |     feature = {
22 |         'action': _bytes_feature(tf.io.serialize_tensor(action)),
23 |         'robot_obs': _bytes_feature(tf.io.serialize_tensor(robot_obs)),
24 |         'rgb_static': _bytes_feature(tf.io.serialize_tensor(rgb_static)),
25 |         'rgb_gripper': _bytes_feature(tf.io.serialize_tensor(rgb_gripper)),
26 |         'terminate_episode': _bool_feature(terminate_episode),
27 |         'instruction': _bytes_feature(instruction),
28 |     }
29 | 
30 |     example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
31 |     return example_proto.SerializeToString()
32 | 
33 | 
34 | def write_tfrecords(root_dir, out_dir):
35 |     if not os.path.exists(out_dir):
36 |         os.makedirs(out_dir)
37 | 
38 |     # Get the language annotation and corresponding indices
39 |     f = np.load(os.path.join(root_dir, "lang_annotations/auto_lang_ann.npy"), allow_pickle=True)
40 |     lang = f.item()['language']['ann']
41 |     lang = np.array([x.encode('utf-8') for x in lang])
42 |     lang_start_end_idx = f.item()['info']['indx']
43 |     num_ep = len(lang_start_end_idx)
44 |     
45 |     with tqdm(total=num_ep) as pbar:
46 |         for episode_idx, (start_idx, end_idx) in enumerate(lang_start_end_idx):
47 |             pbar.update(1)
48 | 
49 |             step_files = [
50 |                 f"episode_{str(i).zfill(7)}.npz"
51 |                 for i in range(start_idx, end_idx + 1)
52 |             ]
53 |             action = []
54 |             robot_obs = []
55 |             rgb_static = []
56 |             rgb_gripper = []
57 |             instr = lang[episode_idx]
58 |             for step_file in step_files:
59 |                 filepath = os.path.join(root_dir, step_file)
60 |                 f = np.load(filepath)
61 |                 # Get relevent things
62 |                 action.append(f['actions'])
63 |                 robot_obs.append(f['robot_obs'])
64 |                 rgb_static.append(f['rgb_static'])
65 |                 rgb_gripper.append(f['rgb_gripper'])
66 |                 
67 |             tfrecord_path = os.path.join(out_dir, f'{episode_idx:07d}.tfrecord')
68 |             print(f"Writing TFRecords to {tfrecord_path}")
69 |             with tf.io.TFRecordWriter(tfrecord_path) as writer:
70 |                 for i in range(len(step_files)):
71 |                     serialized_example = serialize_example(
72 |                         action[i], robot_obs[i], rgb_static[i], rgb_gripper[i], instr, i == len(step_files) - 1
73 |                     )
74 |                     writer.write(serialized_example)
75 | 
76 | output_dirs = [
77 |     '../datasets/calvin/tfrecords/training',
78 |     '../datasets/calvin/tfrecords/validation'
79 | ]
80 | 
81 | for output_dir in output_dirs:
82 |     if not os.path.exists(output_dir):
83 |         os.makedirs(output_dir)
84 | 
85 | root_dirs = [
86 |     '../datasets/calvin/task_ABC_D/training',
87 |     '../datasets/calvin/task_ABC_D/validation'
88 | ]
89 | 
90 | for root_dir, output_dir in zip(root_dirs, output_dirs):
91 |     print(f"Writing TFRecords to {output_dir}")
92 |     write_tfrecords(root_dir, output_dir)
93 | 


--------------------------------------------------------------------------------
/data/compute_dataset_stat_hdf5.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file will compute the min, max, mean, and standard deviation of each datasets 
 3 | in `pretrain_datasets.json` or `pretrain_datasets.json`.
 4 | """
 5 | 
 6 | import json
 7 | import argparse
 8 | 
 9 | import numpy as np
10 | from tqdm import tqdm
11 | 
12 | from data.hdf5_vla_dataset import HDF5VLADataset
13 | 
14 | 
15 | def process_hdf5_dataset(vla_dataset):
16 |     EPS = 1e-8
17 |     episode_cnt = 0
18 |     state_sum = 0
19 |     state_sum_sq = 0
20 |     z_state_sum = 0
21 |     z_state_sum_sq = 0
22 |     state_cnt = 0
23 |     nz_state_cnt = None
24 |     state_max = None
25 |     state_min = None
26 |     for i in tqdm(range(len(vla_dataset))):
27 |         episode = vla_dataset.get_item(i, state_only=True)
28 |         episode_cnt += 1
29 |         
30 |         states = episode['state']
31 |         
32 |         # Zero the values that are close to zero
33 |         z_states = states.copy()
34 |         z_states[np.abs(states) <= EPS] = 0
35 |         # Compute the non-zero count
36 |         if nz_state_cnt is None:
37 |             nz_state_cnt = np.zeros(states.shape[1])
38 |         nz_state_cnt += np.sum(np.abs(states) > EPS, axis=0)
39 |         
40 |         # Update statistics
41 |         state_sum += np.sum(states, axis=0)
42 |         state_sum_sq += np.sum(states**2, axis=0)
43 |         z_state_sum += np.sum(z_states, axis=0)
44 |         z_state_sum_sq += np.sum(z_states**2, axis=0)
45 |         state_cnt += states.shape[0]
46 |         if state_max is None:
47 |             state_max = np.max(states, axis=0)
48 |             state_min = np.min(states, axis=0)
49 |         else:
50 |             state_max = np.maximum(state_max, np.max(states, axis=0))
51 |             state_min = np.minimum(state_min, np.min(states, axis=0))
52 |     
53 |     # Add one to avoid division by zero
54 |     nz_state_cnt = np.maximum(nz_state_cnt, np.ones_like(nz_state_cnt))
55 |     
56 |     result = {
57 |         "dataset_name": vla_dataset.get_dataset_name(),
58 |         "state_mean": (state_sum / state_cnt).tolist(),
59 |         "state_std": np.sqrt(
60 |             np.maximum(
61 |                 (z_state_sum_sq / nz_state_cnt) - (z_state_sum / state_cnt)**2 * (state_cnt / nz_state_cnt),
62 |                 np.zeros_like(state_sum_sq)
63 |             )
64 |         ).tolist(),
65 |         "state_min": state_min.tolist(),
66 |         "state_max": state_max.tolist(),
67 |     }
68 | 
69 |     return result
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     parser = argparse.ArgumentParser()
74 |     parser.add_argument('--save_path', type=str, 
75 |                         default="configs/dataset_stat.json", 
76 |                         help="JSON file path to save the dataset statistics.")
77 |     parser.add_argument('--skip_exist', action='store_true', 
78 |                         help="Whether to skip the existing dataset statistics.")
79 |     args = parser.parse_args()
80 |     
81 |     vla_dataset = HDF5VLADataset()
82 |     dataset_name = vla_dataset.get_dataset_name()
83 |     
84 |     try:
85 |         with open(args.save_path, 'r') as f:
86 |             results = json.load(f)
87 |     except FileNotFoundError:
88 |         results = {}
89 |     if args.skip_exist and dataset_name in results:
90 |         print(f"Skipping existed {dataset_name} dataset statistics")
91 |     else:
92 |         print(f"Processing {dataset_name} dataset")
93 |         result = process_hdf5_dataset(vla_dataset)
94 |         results[result["dataset_name"]] = result
95 |         with open(args.save_path, 'w') as f:
96 |             json.dump(results, f, indent=4)
97 |     print("All datasets have been processed.")
98 | 


--------------------------------------------------------------------------------
/data/empty_lang_embed.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thu-ml/RoboticsDiffusionTransformer/9af5241cb4456836ddf852b5a0286441f7b5d1d6/data/empty_lang_embed.pt


--------------------------------------------------------------------------------
/data/filelock.py:
--------------------------------------------------------------------------------
 1 | import fcntl
 2 | 
 3 | 
 4 | class FileLock:
 5 |     """
 6 |     A file lock class.
 7 |     """
 8 |     def __init__(self, filename):
 9 |         self.filename = filename
10 |         self.handle = None
11 | 
12 |     def acquire_read_lock(self):
13 |         self.handle = open(self.filename + '.lock', 'r')
14 |         fcntl.flock(self.handle, fcntl.LOCK_SH | fcntl.LOCK_NB)
15 | 
16 |     def acquire_write_lock(self):
17 |         self.handle = open(self.filename + '.lock', 'w')
18 |         fcntl.flock(self.handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
19 | 
20 |     def release_lock(self):
21 |         if self.handle is not None:
22 |             fcntl.flock(self.handle, fcntl.LOCK_UN)
23 |             self.handle.close()
24 |             self.handle = None
25 | 


--------------------------------------------------------------------------------
/data/openx_embod/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the list of dataset names
 4 | DATASETS=(
 5 |     'fractal20220817_data'
 6 |     'kuka'
 7 |     'bridge'
 8 |     'taco_play'
 9 |     'jaco_play'
10 |     'berkeley_cable_routing'
11 |     'roboturk'
12 |     'nyu_door_opening_surprising_effectiveness'
13 |     'viola'
14 |     'berkeley_autolab_ur5'
15 |     'toto'
16 |     'language_table'
17 |     'columbia_cairlab_pusht_real'
18 |     'stanford_kuka_multimodal_dataset_converted_externally_to_rlds'
19 |     'nyu_rot_dataset_converted_externally_to_rlds'
20 |     'stanford_hydra_dataset_converted_externally_to_rlds'
21 |     'austin_buds_dataset_converted_externally_to_rlds'
22 |     'nyu_franka_play_dataset_converted_externally_to_rlds'
23 |     'maniskill_dataset_converted_externally_to_rlds'
24 |     'furniture_bench_dataset_converted_externally_to_rlds'
25 |     'cmu_franka_exploration_dataset_converted_externally_to_rlds'
26 |     'ucsd_kitchen_dataset_converted_externally_to_rlds'
27 |     'ucsd_pick_and_place_dataset_converted_externally_to_rlds'
28 |     'austin_sailor_dataset_converted_externally_to_rlds'
29 |     'austin_sirius_dataset_converted_externally_to_rlds'
30 |     'bc_z'
31 |     'usc_cloth_sim_converted_externally_to_rlds'
32 |     'utokyo_pr2_opening_fridge_converted_externally_to_rlds'
33 |     'utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds'
34 |     'utokyo_saytap_converted_externally_to_rlds'
35 |     'utokyo_xarm_pick_and_place_converted_externally_to_rlds'
36 |     'utokyo_xarm_bimanual_converted_externally_to_rlds'
37 |     'robo_net'
38 |     'berkeley_mvp_converted_externally_to_rlds'
39 |     'berkeley_rpt_converted_externally_to_rlds'
40 |     'kaist_nonprehensile_converted_externally_to_rlds'
41 |     'stanford_mask_vit_converted_externally_to_rlds'
42 |     'tokyo_u_lsmo_converted_externally_to_rlds'
43 |     'dlr_sara_pour_converted_externally_to_rlds'
44 |     'dlr_sara_grid_clamp_converted_externally_to_rlds'
45 |     'dlr_edan_shared_control_converted_externally_to_rlds'
46 |     'asu_table_top_converted_externally_to_rlds'
47 |     'stanford_robocook_converted_externally_to_rlds'
48 |     'eth_agent_affordances'
49 |     'imperialcollege_sawyer_wrist_cam'
50 |     'iamlab_cmu_pickup_insert_converted_externally_to_rlds'
51 |     'uiuc_d3field'
52 |     'utaustin_mutex'
53 |     'berkeley_fanuc_manipulation'
54 |     'cmu_playing_with_food'
55 |     'cmu_play_fusion'
56 |     'cmu_stretch'
57 |     'berkeley_gnm_recon'
58 |     'berkeley_gnm_cory_hall'
59 |     'berkeley_gnm_sac_son'
60 |     # Additional dataset
61 |     'droid'
62 |     'fmb'
63 |     'dobbe'
64 | )
65 | 
66 | 
67 | # Iterate over each dataset name
68 | for dataset_name in "${DATASETS[@]}"; do
69 |     echo "Downloading $dataset_name"
70 | 
71 |     # Execute gsutil command
72 |     ~/miniconda3/envs/rdt-data/bin/gsutil -m cp -n -r -D "gs://gresearch/robotics/$dataset_name" ../datasets/openx_embod/
73 |     
74 |     # Check if the resulting directory exists
75 |     directory_path="../datasets/openx_embod/$dataset_name"
76 |     if [ ! -d "$directory_path" ]; then
77 |         # If the directory does not exist, then print an error message
78 |         echo "Failed to download $dataset_name"
79 |     else
80 |         # If the directory exists, then print a success message
81 |         echo "Successfully downloaded $dataset_name"
82 |     fi
83 | done
84 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import fractal20220817_data
 2 | from . import bridge
 3 | from . import jaco_play
 4 | from . import nyu_door_opening_surprising_effectiveness
 5 | from . import taco_play
 6 | from . import berkeley_cable_routing
 7 | from . import roboturk
 8 | from . import viola
 9 | from . import berkeley_autolab_ur5
10 | from . import toto
11 | from . import columbia_cairlab_pusht_real
12 | from . import stanford_kuka_multimodal_dataset_converted_externally_to_rlds
13 | from . import nyu_rot_dataset_converted_externally_to_rlds
14 | from . import austin_buds_dataset_converted_externally_to_rlds
15 | from . import nyu_franka_play_dataset_converted_externally_to_rlds
16 | from . import cmu_franka_exploration_dataset_converted_externally_to_rlds
17 | from . import kuka
18 | from . import utokyo_xarm_bimanual_converted_externally_to_rlds
19 | from . import maniskill_dataset_converted_externally_to_rlds
20 | from . import stanford_hydra_dataset_converted_externally_to_rlds
21 | from . import ucsd_kitchen_dataset_converted_externally_to_rlds 
22 | from . import ucsd_pick_and_place_dataset_converted_externally_to_rlds
23 | from . import austin_sailor_dataset_converted_externally_to_rlds
24 | from . import austin_sirius_dataset_converted_externally_to_rlds
25 | from . import bc_z
26 | from . import usc_cloth_sim_converted_externally_to_rlds
27 | from . import utokyo_pr2_opening_fridge_converted_externally_to_rlds
28 | from . import utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds
29 | from . import utokyo_xarm_pick_and_place_converted_externally_to_rlds 
30 | from . import berkeley_mvp_converted_externally_to_rlds
31 | from . import berkeley_rpt_converted_externally_to_rlds
32 | from . import kaist_nonprehensile_converted_externally_to_rlds
33 | from . import stanford_mask_vit_converted_externally_to_rlds
34 | from . import tokyo_u_lsmo_converted_externally_to_rlds
35 | from . import dlr_sara_pour_converted_externally_to_rlds
36 | from . import dlr_sara_grid_clamp_converted_externally_to_rlds
37 | from . import dlr_edan_shared_control_converted_externally_to_rlds
38 | from . import asu_table_top_converted_externally_to_rlds
39 | from . import stanford_robocook_converted_externally_to_rlds
40 | from . import roboturk_real_laundrylayout
41 | from . import roboturk_real_towercreation
42 | from . import roboturk_real_objectsearch
43 | from . import robomimic_lift_ph
44 | from . import robomimic_can_ph
45 | from . import robomimic_square_ph
46 | from . import robomimic_transport_ph
47 | from . import robomimic_tool_hang_ph
48 | from . import eth_agent_affordances
49 | from . import imperialcollege_sawyer_wrist_cam
50 | from . import iamlab_cmu_pickup_insert_converted_externally_to_rlds
51 | from . import uiuc_d3field
52 | from . import utaustin_mutex
53 | from . import berkeley_fanuc_manipulation
54 | from . import cmu_play_fusion
55 | from . import cmu_stretch
56 | from . import berkeley_gnm_recon
57 | from . import berkeley_gnm_cory_hall
58 | from . import berkeley_gnm_sac_son
59 | from . import language_table
60 | from . import furniture_bench_dataset_converted_externally_to_rlds
61 | from . import robo_net
62 | from . import bridgev2
63 | from . import aloha_mobile
64 | from . import aloha_static
65 | from . import droid
66 | from . import fmb
67 | from . import dobbe
68 | from . import qut_dexterous_manpulation
69 | from . import roboset
70 | from . import agilex
71 | from . import rh20t
72 | from . import calvin


--------------------------------------------------------------------------------
/data/preprocess_scripts/asu_table_top_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     # Convert raw action to our action
20 |     action = step['action']
21 |     # Robot action, consists of [7x joint velocities, 2x gripper velocities, 1x terminate episode].
22 |     # NOTE the dimension of action is actually 7, so only 7x joint velocities exists
23 |     joint_vel = action[:7]
24 |     gripper_vel = action[7:9]
25 |     # there are extra action_delta information
26 |     # Robot delta action, consists of [7x joint velocities, 2x gripper velocities, 1x terminate episode].
27 |     # action_delta = step['action_delta']
28 |     
29 |     # Concatenate the action
30 |     step['action'] = {}
31 |     action = step['action']
32 |     action['arm_concat'] = tf.concat([joint_vel, gripper_vel], axis=0)
33 |     action['terminate'] = step['is_terminal']
34 | 
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_vel,gripper_joint_1_vel")
38 |     
39 |     # Convert raw state to our state
40 |     state = step['observation']
41 |     state_vec = state['state']
42 |     # Robot state, consists of [6x robot joint angles, 1x gripper position].
43 |     arm_joint_ang = state_vec[:6]
44 |     grip_pos = state_vec[6:7]
45 |     # Robot joint velocity, consists of [6x robot joint angles, 1x gripper position].
46 |     state_vel = state['state_vel']
47 |     arm_joint_vel = state_vel[:6]
48 |     grip_vel = state_vel[6:7]
49 | 
50 |     # Concatenate the state
51 |     state['arm_concat'] = tf.concat([arm_joint_ang,arm_joint_vel,grip_pos,grip_vel], axis=0)
52 | 
53 |     # Write the state format
54 |     state['format'] = tf.constant(
55 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,gripper_joint_0_pos,gripper_joint_0_vel")
56 | 
57 |     # Clean the task instruction
58 |     # Define the replacements (old, new) as a dictionary
59 |     replacements = {
60 |         '_': ' ',
61 |         '1f': ' ',
62 |         '4f': ' ',
63 |         '-': ' ',
64 |         '50': ' ',
65 |         '55': ' ',
66 |         '56': ' ',
67 |         
68 |     }
69 |     instr = step['language_instruction']
70 |     instr = clean_task_instruction(instr, replacements)
71 |     step['observation']['natural_language_instruction'] = instr
72 | 
73 |     return step
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     import tensorflow_datasets as tfds
78 |     from data.utils import dataset_to_path
79 | 
80 |     DATASET_DIR = 'data/datasets/openx_embod'
81 |     DATASET_NAME = 'fractal20220817_data'
82 |     # Load the dataset
83 |     dataset = tfds.builder_from_directory(
84 |         builder_dir=dataset_to_path(
85 |             DATASET_NAME, DATASET_DIR))
86 |     dataset = dataset.as_dataset(split='all')
87 | 
88 |     # Inspect the dataset
89 |     for episode in dataset:
90 |         for step in episode['steps']:
91 |             print(step)
92 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/austin_buds_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, rotation_matrix_to_ortho6d
 4 | 
 5 | def process_step(step: dict) -> dict:
 6 |     """
 7 |     Unify the action format and clean the task instruction.
 8 | 
 9 |     DO NOT use python list, use tf.TensorArray instead.
10 |     """
11 |     # Convert raw action to our action
12 | 
13 |     origin_action = step['action']
14 |     step['action']={}
15 |     action=step['action']
16 |     action['terminate'] = step['is_terminal']
17 | 
18 |     eef_delta_pos = origin_action[:3]
19 |     eef_ang=origin_action[3:6]
20 |     eef_ang = euler_to_quaternion(eef_ang)
21 |     # gripper_open: -1-open, 1-closed
22 |     grip_open=tf.where(tf.equal(origin_action[6:],tf.constant(-1.0)),tf.constant(1.0),tf.constant(0.0))
23 | 
24 |     # No base found
25 | 
26 |     # Concatenate the action
27 |     action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0)
28 | 
29 |     # Write the action format
30 |     action['format'] = tf.constant(
31 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
32 | 
33 |     # Convert raw state to our state
34 |     state = step['observation']
35 |     # Concatenate the state
36 |     eef_mat = tf.transpose(tf.reshape(state['state'][8:], (4, 4)))
37 |     eef_pos = eef_mat[:3, 3]
38 |     rotaion_matrix = eef_mat[:3, :3]
39 |     eef_ang = rotation_matrix_to_ortho6d(rotaion_matrix)
40 |     joint_pos = state['state'][:7]
41 |     grip_open = state['state'][7:8] * 12.5 # rescale to [0, 1]
42 |     state['arm_concat'] = tf.concat([joint_pos,grip_open,eef_pos,eef_ang],axis=0)
43 | 
44 |     # Write the state format
45 |     state['format'] = tf.constant(
46 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
47 | 
48 |     # Clean the task instruction
49 |     # Define the replacements (old, new) as a dictionary
50 |     replacements = {
51 |         '_': ' ',
52 |         '1f': ' ',
53 |         '4f': ' ',
54 |         '-': ' ',
55 |         '50': ' ',
56 |         '55': ' ',
57 |         '56': ' ',
58 |         
59 |     }
60 |     instr = step['language_instruction']
61 |     instr = clean_task_instruction(instr, replacements)
62 |     step['observation']['natural_language_instruction'] = instr
63 | 
64 |     return step
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     import tensorflow_datasets as tfds
69 |     from data.utils import dataset_to_path
70 | 
71 |     DATASET_DIR = 'data/datasets/openx_embod'
72 |     DATASET_NAME = 'austin_buds_dataset_converted_externally_to_rlds'
73 |     # Load the dataset
74 |     dataset = tfds.builder_from_directory(
75 |         builder_dir=dataset_to_path(
76 |             DATASET_NAME, DATASET_DIR))
77 |     dataset = dataset.as_dataset(split='all')
78 | 
79 |     # Inspect the dataset
80 |     for episode in dataset:
81 |         for step in episode['steps']:
82 |             print(step)
83 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/austin_sailor_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, \
 4 |     quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     # Robot action, consists of [3x ee relative pos, 3x ee relative rotation, 1x gripper action].
22 |     action = step['action']
23 |     eef_delta_pos = action[:3]
24 |     eef_ang = action[3:6]
25 |     eef_ang = euler_to_quaternion(eef_ang)  
26 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
27 | 
28 |     # Concatenate the action
29 |     # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
30 |     step['action'] = {}
31 |     action = step['action']
32 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
33 |     action['terminate'] = step['is_terminal']
34 | 
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
38 | 
39 |     # Convert raw state to our state
40 |     state = step['observation']['state']
41 |     joint_pos = step['observation']['state_joint']
42 |     # Default robot state, consists of [3x robot ee pos, 3x ee quat, 1x gripper state].
43 |     eef_pos = state[:3]
44 |     eef_quat = state[3:7]
45 |     eef_ang = quaternion_to_rotation_matrix(eef_quat)
46 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
47 |     gripper_joint_pos = state[7:8] * 12.85  # rescale to [0, 1]
48 | 
49 |     # Concatenate the state
50 |     state = step['observation']
51 |     state['arm_concat'] = tf.concat([joint_pos,gripper_joint_pos,eef_pos,eef_ang], axis=0)
52 | 
53 |     # Write the state format
54 |     state['format'] = tf.constant(
55 |            "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
56 | 
57 |     # Clean the task instruction
58 |     # Define the replacements (old, new) as a dictionary
59 |     replacements = {
60 |         '_': ' ',
61 |         '1f': ' ',
62 |         '4f': ' ',
63 |         '-': ' ',
64 |         '50': ' ',
65 |         '55': ' ',
66 |         '56': ' ',
67 |         
68 |     }
69 |     instr = step['language_instruction']
70 |     instr = clean_task_instruction(instr, replacements)
71 |     step['observation']['natural_language_instruction'] = instr
72 | 
73 |     return step
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     import tensorflow_datasets as tfds
78 |     from data.utils import dataset_to_path
79 | 
80 |     DATASET_DIR = 'data/datasets/openx_embod'
81 |     DATASET_NAME = 'fractal20220817_data'
82 |     # Load the dataset
83 |     dataset = tfds.builder_from_directory(
84 |         builder_dir=dataset_to_path(
85 |             DATASET_NAME, DATASET_DIR))
86 |     dataset = dataset.as_dataset(split='all')
87 | 
88 |     # Inspect the dataset
89 |     for episode in dataset:
90 |         for step in episode['steps']:
91 |             print(step)
92 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/austin_sirius_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     
20 |     # Convert raw action to our action
21 |     # Robot action, consists of [3x ee relative pos, 3x ee relative rotation, 1x gripper action].
22 |     action = step['action']
23 |     eef_pos = action[:3]
24 |     eef_ang = action[3:6]
25 |     eef_ang = euler_to_rotation_matrix(eef_ang)
26 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
27 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
28 | 
29 |     # Concatenate the action
30 |     # action['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0)
31 |     step['action'] = {}
32 |     action = step['action']
33 |     
34 |     action['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0)
35 |     action['terminate'] = step['is_terminal']
36 | 
37 |     # Write the action format
38 |     action['format'] = tf.constant(
39 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
40 | 
41 |     # Convert raw state to our state
42 |     joint_pos = step['observation']['state'][:7]
43 |     grip_open = step['observation']['state'][7:8] * 12.55
44 |     state_ee = step['observation']['state_ee']
45 |     # Tensor (16,) End-effector state, represented as 4x4 homogeneous transformation matrix of ee pose.
46 |     transform_matrix = tf.transpose(tf.reshape(state_ee, (4, 4)))    
47 |     eef_pos = transform_matrix[:3, 3]
48 |     rotation_matrix = transform_matrix[:3, :3]
49 |     eef_ang = rotation_matrix_to_ortho6d(rotation_matrix)
50 |     # Concatenate the state
51 |     
52 |     state = step['observation']
53 |     state['arm_concat'] = tf.concat([joint_pos,grip_open,eef_pos,eef_ang], axis=0)
54 | 
55 |     # Write the state format
56 |     state['format'] = tf.constant(
57 |            "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
58 | 
59 |     # Clean the task instruction
60 |     # Define the replacements (old, new) as a dictionary
61 |     replacements = {
62 |         '_': ' ',
63 |         '1f': ' ',
64 |         '4f': ' ',
65 |         '-': ' ',
66 |         '50': ' ',
67 |         '55': ' ',
68 |         '56': ' ',
69 |         
70 |     }
71 |     instr = step['language_instruction']
72 |     instr = clean_task_instruction(instr, replacements)
73 |     step['observation']['natural_language_instruction'] = instr
74 | 
75 |     return step
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     import tensorflow_datasets as tfds
80 |     from data.utils import dataset_to_path
81 | 
82 |     DATASET_DIR = 'data/datasets/openx_embod'
83 |     DATASET_NAME = 'fractal20220817_data'
84 |     # Load the dataset
85 |     dataset = tfds.builder_from_directory(
86 |         builder_dir=dataset_to_path(
87 |             DATASET_NAME, DATASET_DIR))
88 |     dataset = dataset.as_dataset(split='all')
89 | 
90 |     # Inspect the dataset
91 |     for episode in dataset:
92 |         for step in episode['steps']:
93 |             print(step)
94 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/bc_z.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, \
 4 |     euler_to_rotation_matrix, rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     
21 |     # Convert raw action to our action
22 |     action = step['action']
23 |     # The next 10 actions for the positions. Each action is a 3D delta to add to current position.
24 |     eef_delta_pos = action['future/xyz_residual'][:3]
25 |     # The next 10 actions for the rotation. Each action is a 3D delta to add to the current axis angle.
26 |     eef_ang = action['future/axis_angle_residual'][:3]
27 |     eef_ang = euler_to_quaternion(eef_ang)
28 |     # The next 10 actions for the gripper. Each action is the value the gripper closure should be changed to (notably it is not a delta.)
29 |     grip_open = tf.cast(tf.expand_dims(1 - action['future/target_close'][0], axis=0), dtype=tf.float32)
30 | 
31 |     # Concatenate the action
32 |     step['action'] = {}
33 |     action = step['action']
34 |     
35 |     action['terminate'] = step['is_terminal']
36 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
37 |    
38 |     # Write the action format
39 |     action['format'] = tf.constant(
40 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
41 | 
42 |     # Convert raw state to our state
43 |     state = step['observation']
44 | 
45 |     gripper_ang = state['present/axis_angle']
46 |     gripper_ang = euler_to_rotation_matrix(gripper_ang)
47 |     gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
48 |     gripper_pos = state['present/xyz']
49 |     # How much the gripper is currently closed. Scaled from 0 to 1, but not all values from 0 to 1 are reachable. The range in the data is about 0.2 to 1
50 |     gripper_open = 1- state['present/sensed_close']
51 | 
52 | 
53 |     # Concatenate the state
54 |     state = step['observation']
55 |     state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0)
56 | 
57 |     # Write the state format
58 |     state['format'] = tf.constant(
59 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
60 | 
61 |     # Clean the task instruction
62 |     # Define the replacements (old, new) as a dictionary
63 |     replacements = {
64 |         '_': ' ',
65 |         '1f': ' ',
66 |         '4f': ' ',
67 |         '-': ' ',
68 |         '50': ' ',
69 |         '55': ' ',
70 |         '56': ' ',
71 |         
72 |     }
73 |     instr = step['observation']['natural_language_instruction']
74 |     instr = clean_task_instruction(instr, replacements)
75 |     step['observation']['natural_language_instruction'] = instr
76 | 
77 |     return step
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     import tensorflow_datasets as tfds
82 |     from data.utils import dataset_to_path
83 | 
84 |     DATASET_DIR = 'data/datasets/openx_embod'
85 |     DATASET_NAME = 'fractal20220817_data'
86 |     # Load the dataset
87 |     dataset = tfds.builder_from_directory(
88 |         builder_dir=dataset_to_path(
89 |             DATASET_NAME, DATASET_DIR))
90 |     dataset = dataset.as_dataset(split='all')
91 | 
92 |     # Inspect the dataset
93 |     for episode in dataset:
94 |         for step in episode['steps']:
95 |             print(step)
96 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/berkeley_autolab_ur5.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, \
 4 |     quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
23 |     eef_delta_pos = action['world_vector']
24 |     eef_ang = action['rotation_delta']
25 |     eef_ang = euler_to_quaternion(eef_ang)
26 | 
27 |     # Ignore action['gripper_open']: 1 if close gripper, -1 if open gripper, 0 if no change.
28 | 
29 |     # No base found
30 | 
31 |     # Concatenate the action
32 |     arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
33 |     action['arm_concat'] = arm_action
34 | 
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
38 | 
39 |     # Convert raw state to our state
40 |     state = step['observation']
41 |     # state['robot_state']:[joint0, joint1, joint2, joint3, joint4, joint5, x,y,z, qx,qy,qz,qw, gripper_is_closed, action_blocked]
42 |     robot_state = state['robot_state']
43 |     joint_pos=robot_state[:6]
44 |     eef_pos = robot_state[6:9]
45 |     eef_quat = robot_state[9:13]
46 |     eef_ang = quaternion_to_rotation_matrix(eef_quat)
47 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
48 |     # gripper_is_closed is binary: 0 = fully open; 1 = fully closed
49 |     grip_closed = robot_state[13:14]
50 |     grip_open= 1-grip_closed
51 |     # action_blocked is binary: 0 = not blocked; 1 = blocked
52 |     # action_blocked = robot_state[14:15]
53 |     
54 |     # Concatenate the state
55 |     state['arm_concat'] = tf.concat([joint_pos, grip_open,eef_pos,eef_ang], axis=0)
56 | 
57 |     # Write the state format
58 |     state['format'] = tf.constant(
59 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
60 | 
61 |     # Clean the task instruction
62 |     # Define the replacements (old, new) as a dictionary
63 |     replacements = {
64 |         '_': ' ',
65 |         '1f': ' ',
66 |         '4f': ' ',
67 |         '-': ' ',
68 |         '50': ' ',
69 |         '55': ' ',
70 |         '56': ' ',
71 |         
72 |     }
73 |     instr = step['observation']['natural_language_instruction']
74 |     instr = clean_task_instruction(instr, replacements)
75 |     step['observation']['natural_language_instruction'] = instr
76 | 
77 |     return step
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     import tensorflow_datasets as tfds
82 |     from data.utils import dataset_to_path
83 | 
84 |     DATASET_DIR = 'data/datasets/openx_embod'
85 |     DATASET_NAME = 'berkeley_autolab_ur5'
86 |     # Load the dataset
87 |     dataset = tfds.builder_from_directory(
88 |         builder_dir=dataset_to_path(
89 |             DATASET_NAME, DATASET_DIR))
90 |     dataset = dataset.as_dataset(split='all')
91 | 
92 |     # Inspect the dataset
93 |     for episode in dataset:
94 |         for step in episode['steps']:
95 |             print(step)
96 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/berkeley_cable_routing.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     # Convert raw action to our action
20 |     action = step['action']
21 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
22 |     
23 |     eef_delta_pos = action['world_vector']
24 |     eef_ang=action['rotation_delta']
25 | 
26 |     # No gripper_open found
27 |     # No base found
28 |     
29 |     # Concatenate the action
30 |     arm_action=tf.concat([eef_delta_pos,eef_ang],axis=0)
31 |     action['arm_concat']=arm_action
32 |     #base_action = tf.concat([base_pos, base_ang], axis=0)
33 |     #action['base_concat'] = base_action
34 | 
35 |     # Write the action format
36 |     action['format']=tf.constant("eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw")
37 |     
38 |     # Convert raw state to our state
39 |     state = step['observation']
40 |     eef_pos = state['robot_state'][:3]
41 |     eef_ang = quaternion_to_rotation_matrix(state['robot_state'][3:])
42 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
43 |     
44 |     # Concatenate the state
45 |     state['arm_concat']=tf.concat([eef_pos,eef_ang],axis=0)
46 | 
47 |     # Write the state format
48 |     state['format'] = tf.constant(
49 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
50 | 
51 |     # Define the task instruction
52 |     step['observation']['natural_language_instruction'] = tf.constant(
53 |         "Route cable through the tight-fitting clip mounted on the table.")
54 | 
55 |     return step
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     import tensorflow_datasets as tfds
60 |     from data.utils import dataset_to_path
61 | 
62 |     DATASET_DIR = 'data/datasets/openx_embod/'
63 |     DATASET_NAME = 'berkeley_cable_routing'
64 |     # Load the dataset
65 |     dataset = tfds.builder_from_directory(
66 |         builder_dir=dataset_to_path(
67 |             DATASET_NAME, DATASET_DIR))
68 |     dataset = dataset.as_dataset(split='all')
69 | 
70 |     # Inspect the dataset
71 |     for episode in dataset:
72 |         for step in episode['steps']:
73 |             print(step)
74 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/berkeley_fanuc_manipulation.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, \
 4 |     quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 | 
14 |     origin_action = step['action']
15 |     step['action']={}
16 |     action=step['action']
17 |     action['terminate'] = step['is_terminal']
18 |     # 6x end effector delta pose, 1x gripper position
19 |     eef_delta_pos = origin_action[:3]
20 |     eef_ang=origin_action[3:6]
21 |     eef_ang = euler_to_quaternion(eef_ang)
22 |     # No base found
23 | 
24 |     # Concatenate the action
25 |     action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang],axis=0)
26 |     
27 |     # Write the action format
28 |     action['format'] = tf.constant(
29 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
30 | 
31 |     # Convert raw state to our state
32 |     state = step['observation']
33 |     # Concatenate the state
34 |     # [6x robot joint angles, 1x gripper open status, 6x robot joint velocities].
35 |     arm_joint_ang=state['state'][:6]
36 |     grip_open=1-state['state'][6:7]
37 |     # arm_joint_vel=state['state'][7:13] # all zeros
38 |     eef_pos = state['end_effector_state'][:3]
39 |     eef_ang = quaternion_to_rotation_matrix(state['end_effector_state'][3:])
40 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
41 |     state['arm_concat'] = tf.concat([arm_joint_ang,grip_open,eef_pos,eef_ang],axis=0)
42 | 
43 |     # Write the state format
44 |     state['format'] = tf.constant(
45 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
46 | 
47 |     # Clean the task instruction
48 |     # Define the replacements (old, new) as a dictionary
49 |     replacements = {
50 |         '_': ' ',
51 |         '1f': ' ',
52 |         '4f': ' ',
53 |         '-': ' ',
54 |         '50': ' ',
55 |         '55': ' ',
56 |         '56': ' ',
57 |         
58 |     }
59 |     instr = step['language_instruction']
60 |     instr = clean_task_instruction(instr, replacements)
61 |     step['observation']['natural_language_instruction'] = instr
62 | 
63 |     return step
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     import tensorflow_datasets as tfds
68 |     from data.utils import dataset_to_path
69 | 
70 |     DATASET_DIR = 'data/datasets/openx_embod'
71 |     DATASET_NAME = 'berkeley_fanuc_manipulation'
72 |     # Load the dataset
73 |     dataset = tfds.builder_from_directory(
74 |         builder_dir=dataset_to_path(
75 |             DATASET_NAME, DATASET_DIR))
76 |     dataset = dataset.as_dataset(split='all')
77 | 
78 |     # Inspect the dataset
79 |     for episode in dataset:
80 |         for step in episode['steps']:
81 |             print(step)
82 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/berkeley_gnm_cory_hall.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 | 
14 |     origin_action = step['action']
15 |     step['action']={}
16 |     action=step['action']
17 |     action['terminate'] = step['is_terminal']
18 |   
19 |     eef_pos=tf.cast(origin_action,dtype=tf.float32)
20 |     eef_ang=tf.cast(step['action_angle'][2:3],dtype=tf.float32)
21 |     eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]],axis=0))
22 |     # No base found
23 | 
24 |     # Concatenate the action
25 |     action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
26 | 
27 |     # Write the action format
28 |     action['format'] = tf.constant(
29 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
30 |     
31 |     # Convert raw state to our state
32 |     state = step['observation']
33 |     # Concatenate the state
34 |     eef_pos=tf.cast(state['position'],dtype=tf.float32)
35 |     eef_ang=tf.cast(state['yaw'],dtype=tf.float32)
36 |     eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0))
37 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
38 |     state['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
39 |     # Write the state format
40 |     state['format'] = tf.constant(
41 |         "eef_pos_x,eef_pos_y,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
42 | 
43 |     # Clean the task instruction
44 |     # Define the replacements (old, new) as a dictionary
45 |     replacements = {
46 |         '_': ' ',
47 |         '1f': ' ',
48 |         '4f': ' ',
49 |         '-': ' ',
50 |         '50': ' ',
51 |         '55': ' ',
52 |         '56': ' ',
53 |         
54 |     }
55 |     instr = step['language_instruction']
56 |     instr = clean_task_instruction(instr, replacements)
57 |     step['observation']['natural_language_instruction'] = instr
58 | 
59 |     return step
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     import tensorflow_datasets as tfds
64 |     from data.utils import dataset_to_path
65 | 
66 |     DATASET_DIR = 'data/datasets/openx_embod'
67 |     DATASET_NAME = 'berkeley_gnm_cory_hall'
68 |     # Load the dataset
69 |     dataset = tfds.builder_from_directory(
70 |         builder_dir=dataset_to_path(
71 |             DATASET_NAME, DATASET_DIR))
72 |     dataset = dataset.as_dataset(split='all')
73 | 
74 |     # Inspect the dataset
75 |     for episode in dataset:
76 |         for step in episode['steps']:
77 |             print(step['action'][6:7])
78 | 
79 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/berkeley_gnm_recon.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 | 
14 |     origin_action = step['action']
15 |     step['action']={}
16 |     action=step['action']
17 |     action['terminate'] = step['is_terminal']
18 |   
19 |     eef_pos=tf.cast(origin_action,dtype=tf.float32)
20 |     eef_ang=tf.cast(step['action_angle'][2:3],dtype=tf.float32)
21 |     eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]],axis=0))
22 |     # No base found
23 | 
24 |     # Concatenate the action
25 |     action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
26 | 
27 |     # Write the action format
28 |     action['format'] = tf.constant(
29 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
30 |     
31 |     # Convert raw state to our state
32 |     state = step['observation']
33 |     # Concatenate the state
34 |     eef_pos=tf.cast(state['position'],dtype=tf.float32)
35 |     eef_ang=tf.cast(state['yaw'],dtype=tf.float32)
36 |     eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0))
37 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
38 |     state['arm_concat'] = tf.concat([eef_pos/100,eef_ang],axis=0)
39 |     # Write the state format
40 |     state['format'] = tf.constant(
41 |         "eef_pos_x,eef_pos_y,eef_angle_x,eef_angle_y,eef_angle_z,eef_angle_w")
42 | 
43 |     # Clean the task instruction
44 |     # Define the replacements (old, new) as a dictionary
45 |     replacements = {
46 |         '_': ' ',
47 |         '1f': ' ',
48 |         '4f': ' ',
49 |         '-': ' ',
50 |         '50': ' ',
51 |         '55': ' ',
52 |         '56': ' ',
53 |         
54 |     }
55 |     instr = step['language_instruction']
56 |     instr = clean_task_instruction(instr, replacements)
57 |     step['observation']['natural_language_instruction'] = instr
58 | 
59 |     return step
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     import tensorflow_datasets as tfds
64 |     from data.utils import dataset_to_path
65 | 
66 |     DATASET_DIR = 'data/datasets/openx_embod'
67 |     DATASET_NAME = 'berkeley_gnm_recon'
68 |     # Load the dataset
69 |     dataset = tfds.builder_from_directory(
70 |         builder_dir=dataset_to_path(
71 |             DATASET_NAME, DATASET_DIR))
72 |     dataset = dataset.as_dataset(split='all')
73 | 
74 |     # Inspect the dataset
75 |     for episode in dataset:
76 |         for step in episode['steps']:
77 |             print(step['action'][6:7])
78 | 
79 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/berkeley_gnm_sac_son.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 | 
14 |     origin_action = step['action']
15 |     step['action']={}
16 |     action=step['action']
17 |     action['terminate'] = step['is_terminal']
18 |   
19 |     eef_pos=tf.cast(origin_action, dtype=tf.float32)
20 |     eef_ang=tf.cast(step['action_angle'][2:3], dtype=tf.float32)
21 |     eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]], axis=0))
22 |     # No base found
23 | 
24 |     # Concatenate the action
25 |     action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
26 | 
27 |     # Write the action format
28 |     action['format'] = tf.constant(
29 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
30 |     
31 |     # Convert raw state to our state
32 |     state = step['observation']
33 |     # Concatenate the state
34 |     eef_pos=tf.cast(state['position'],dtype=tf.float32)
35 |     eef_ang=tf.cast(state['yaw'],dtype=tf.float32)
36 |     eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0))
37 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
38 |     state['arm_concat'] = tf.concat([eef_pos/100,eef_ang],axis=0)
39 |     # Write the state format
40 |     state['format'] = tf.constant(
41 |         "eef_pos_x,eef_pos_y,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
42 | 
43 |     # Clean the task instruction
44 |     # Define the replacements (old, new) as a dictionary
45 |     replacements = {
46 |         '_': ' ',
47 |         '1f': ' ',
48 |         '4f': ' ',
49 |         '-': ' ',
50 |         '50': ' ',
51 |         '55': ' ',
52 |         '56': ' ',
53 |         
54 |     }
55 |     instr = step['language_instruction']
56 |     instr = clean_task_instruction(instr, replacements)
57 |     step['observation']['natural_language_instruction'] = instr
58 | 
59 |     return step
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     import tensorflow_datasets as tfds
64 |     from data.utils import dataset_to_path
65 | 
66 |     DATASET_DIR = 'data/datasets/openx_embod'
67 |     DATASET_NAME = 'berkeley_gnm_sac_son'
68 |     # Load the dataset
69 |     dataset = tfds.builder_from_directory(
70 |         builder_dir=dataset_to_path(
71 |             DATASET_NAME, DATASET_DIR))
72 |     dataset = dataset.as_dataset(split='all')
73 | 
74 |     # Inspect the dataset
75 |     for episode in dataset:
76 |         for step in episode['steps']:
77 |             print(step['action'][6:7])
78 | 
79 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/berkeley_mvp_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     # 	Robot action, consists of [7 delta joint pos,1x gripper binary state].
23 |     delta_joint_pos = action[:7]
24 |     grip_open = tf.expand_dims(1 - action[7], axis=0)
25 | 
26 |     # Concatenate the action
27 |     # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
28 |     step['action'] = {}
29 |     action = step['action']
30 |     action['arm_concat'] = tf.concat([delta_joint_pos, grip_open], axis=0)
31 |     action['terminate'] = step['is_terminal']
32 | 
33 |     # Write the action format
34 |     action['format'] = tf.constant(
35 |         "arm_joint_0_delta_pos,arm_joint_1_delta_pos,arm_joint_2_delta_pos,arm_joint_3_delta_pos,arm_joint_4_delta_pos,arm_joint_5_delta_pos,arm_joint_6_delta_pos,gripper_open")
36 | 
37 |     # Convert raw state to our state
38 |     state = step['observation']
39 |     # xArm joint positions (7 DoF).
40 |     arm_joint_pos = state['joint_pos']
41 |     # Binary gripper state (1 - closed, 0 - open)
42 |     grip_open = tf.expand_dims(1 - tf.cast(state['gripper'],dtype=tf.float32), axis=0)
43 |     # Gripper pose, robot frame, [3 position, 4 rotation]
44 |     gripper_pose = state['pose'][:3]
45 |     # gripper_ang = quaternion_to_euler(state['pose'][3:7])
46 |     gripper_ang = quaternion_to_rotation_matrix(state['pose'][3:7])
47 |     gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
48 | 
49 |     # Concatenate the state
50 |     state['arm_concat'] = tf.concat([arm_joint_pos, gripper_pose,gripper_ang, grip_open], axis=0)
51 | 
52 |     # Write the state format
53 |     state['format'] = tf.constant(
54 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
55 | 
56 |     # Clean the task instruction
57 |     # Define the replacements (old, new) as a dictionary
58 |     replacements = {
59 |         '_': ' ',
60 |         '1f': ' ',
61 |         '4f': ' ',
62 |         '-': ' ',
63 |         '50': ' ',
64 |         '55': ' ',
65 |         '56': ' ',
66 |         
67 |     }
68 |     instr = step['language_instruction']
69 |     instr = clean_task_instruction(instr, replacements)
70 |     step['observation']['natural_language_instruction'] = instr
71 | 
72 |     return step
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     import tensorflow_datasets as tfds
77 |     from data.utils import dataset_to_path
78 | 
79 |     DATASET_DIR = 'data/datasets/openx_embod'
80 |     DATASET_NAME = 'fractal20220817_data'
81 |     # Load the dataset
82 |     dataset = tfds.builder_from_directory(
83 |         builder_dir=dataset_to_path(
84 |             DATASET_NAME, DATASET_DIR))
85 |     dataset = dataset.as_dataset(split='all')
86 | 
87 |     # Inspect the dataset
88 |     for episode in dataset:
89 |         for step in episode['steps']:
90 |             print(step)
91 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/berkeley_rpt_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     # Convert raw action to our action
20 |     action = step['action']
21 |     # 	Robot action, consists of [7 delta joint pos,1x gripper binary state].
22 |     delta_joint_pos = action[:7]
23 |     grip_open = tf.expand_dims(1 - action[7], axis=0)
24 | 
25 |     # Concatenate the action
26 |     # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
27 |     step['action'] = {}
28 |     action = step['action']
29 |     action['arm_concat'] = tf.concat([delta_joint_pos, grip_open], axis=0)
30 |     action['terminate'] = step['is_terminal']
31 | 
32 |     # Write the action format
33 |     action['format'] = tf.constant(
34 |         "arm_joint_0_delta_pos,arm_joint_1_delta_pos,arm_joint_2_delta_pos,arm_joint_3_delta_pos,arm_joint_4_delta_pos,arm_joint_5_delta_pos,arm_joint_6_delta_pos,gripper_open")
35 | 
36 |     # Convert raw state to our state
37 |     state = step['observation']
38 |     # xArm joint positions (7 DoF).
39 |     arm_joint_pos = state['joint_pos']
40 |     # Binary gripper state (1 - closed, 0 - open)
41 |     grip_open = tf.expand_dims(1 - tf.cast(state['gripper'],dtype=tf.float32), axis=0)
42 | 
43 |     # Concatenate the state
44 |     state['arm_concat'] = tf.concat([arm_joint_pos, grip_open], axis=0)
45 | 
46 |     # Write the state format
47 |     state['format'] = tf.constant(
48 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open")
49 | 
50 |     # Clean the task instruction
51 |     # Define the replacements (old, new) as a dictionary
52 |     replacements = {
53 |         '_': ' ',
54 |         '1f': ' ',
55 |         '4f': ' ',
56 |         '-': ' ',
57 |         '50': ' ',
58 |         '55': ' ',
59 |         '56': ' ',
60 |         
61 |     }
62 |     instr = step['language_instruction']
63 |     instr = clean_task_instruction(instr, replacements)
64 |     step['observation']['natural_language_instruction'] = instr
65 | 
66 |     return step
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     import tensorflow_datasets as tfds
71 |     from data.utils import dataset_to_path
72 | 
73 |     DATASET_DIR = 'data/datasets/openx_embod'
74 |     DATASET_NAME = 'fractal20220817_data'
75 |     # Load the dataset
76 |     dataset = tfds.builder_from_directory(
77 |         builder_dir=dataset_to_path(
78 |             DATASET_NAME, DATASET_DIR))
79 |     dataset = dataset.as_dataset(split='all')
80 | 
81 |     # Inspect the dataset
82 |     for episode in dataset:
83 |         for step in episode['steps']:
84 |             print(step)
85 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/bridge.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
23 |     
24 |     eef_delta_pos = action['world_vector']
25 |     eef_ang=action['rotation_delta']
26 |     eef_ang = euler_to_quaternion(eef_ang)
27 |     grip_open=tf.reshape(tf.where(action['open_gripper'],1.0, 0.0),(1,))
28 |     # grip_open:tensor
29 | 
30 |     # No base found
31 |     
32 |     # Concatenate the action
33 |     arm_action=tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0)
34 |     action['arm_concat']=arm_action
35 |     #base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0)
36 |     #action['base_concat'] = base_action
37 | 
38 |     # Write the action format
39 |     action['format']=tf.constant("eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
40 |     
41 |     # Convert raw state to our state
42 |     state= step['observation']
43 |     eef_pos=state['state'][:3]
44 |     eef_ang=state['state'][3:6]
45 |     eef_ang = euler_to_rotation_matrix(eef_ang)
46 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
47 |     gripper_action=state['state'][6:]
48 |     
49 |     # Concatenate the state
50 |     state['arm_concat']=tf.concat([eef_pos,eef_ang,gripper_action],axis=0)
51 | 
52 |     # Write the state format
53 |     state['format'] = tf.constant(
54 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
55 | 
56 |     # Clean the task instruction
57 |     # Define the replacements (old, new) as a dictionary
58 |     replacements = {
59 |         '_': ' ',
60 |         '1f': ' ',
61 |         '4f': ' ',
62 |         '-': ' ',
63 |         '50': ' ',
64 |         '55': ' ',
65 |         '56': ' ',
66 |         
67 |     }
68 |     instr = step['observation']['natural_language_instruction']
69 |     instr = clean_task_instruction(instr, replacements)
70 |     step['observation']['natural_language_instruction'] = instr
71 | 
72 |     return step
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     import tensorflow_datasets as tfds
77 |     from data.utils import dataset_to_path
78 | 
79 |     DATASET_DIR = 'data/datasets/openx_embod/'
80 |     DATASET_NAME = 'bridge'
81 |     # Load the dataset
82 |     dataset = tfds.builder_from_directory(
83 |         builder_dir=dataset_to_path(
84 |             DATASET_NAME, DATASET_DIR))
85 |     dataset = dataset.as_dataset(split='all')
86 | 
87 |     # Inspect the dataset
88 |     for episode in dataset:
89 |         for step in episode['steps']:
90 |             print(step)
91 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/cmu_franka_exploration_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler,euler_to_quaternion
 4 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 5 |     """
 6 |     Convert terminate action to a boolean, where True means terminate.
 7 |     """
 8 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
 9 | 
10 | def process_step(step: dict) -> dict:
11 |     """
12 |     Unify the action format and clean the task instruction.
13 | 
14 |     DO NOT use python list, use tf.TensorArray instead.
15 |     """
16 |     # Convert raw action to our action
17 | 
18 |     origin_action = step['action']
19 |     step['action']={}
20 |     action=step['action']
21 |     action['terminate']=terminate_act_to_bool(origin_action[7])
22 |     
23 |     # gripper_open: 1-open, 0-closed
24 |     
25 |     eef_pos=origin_action[:3]
26 |     eef_ang=origin_action[3:6]
27 |     eef_ang = euler_to_quaternion(eef_ang)
28 |     grip_open=origin_action[6:7]
29 |     # No base found
30 | 
31 |     # Concatenate the action
32 |     action['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0)
33 | 
34 |     # Write the action format
35 |     action['format'] = tf.constant(
36 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
37 | 
38 |     # No state found
39 |     
40 |     # Clean the task instruction
41 |     # Define the replacements (old, new) as a dictionary
42 |     replacements = {
43 |         '_': ' ',
44 |         '1f': ' ',
45 |         '4f': ' ',
46 |         '-': ' ',
47 |         '50': ' ',
48 |         '55': ' ',
49 |         '56': ' ',
50 |         
51 |     }
52 |     instr = step['language_instruction']
53 |     instr = clean_task_instruction(instr, replacements)
54 |     step['observation']['natural_language_instruction'] = instr
55 | 
56 |     return step
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     import tensorflow_datasets as tfds
61 |     from data.utils import dataset_to_path
62 | 
63 |     DATASET_DIR = 'data/datasets/openx_embod'
64 |     DATASET_NAME = 'cmu_franka_exploration_dataset_converted_externally_to_rlds'
65 |     # Load the dataset
66 |     dataset = tfds.builder_from_directory(
67 |         builder_dir=dataset_to_path(
68 |             DATASET_NAME, DATASET_DIR))
69 |     dataset = dataset.as_dataset(split='all')
70 | 
71 |     # Inspect the dataset
72 |     for episode in dataset:
73 |         for step in episode['steps']:
74 |             print(step['action'][6:7])
75 | 
76 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/cmu_play_fusion.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler
 4 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 5 |     """
 6 |     Convert terminate action to a boolean, where True means terminate.
 7 |     """
 8 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
 9 | 
10 | def process_step(step: dict) -> dict:
11 |     """
12 |     Unify the action format and clean the task instruction.
13 | 
14 |     DO NOT use python list, use tf.TensorArray instead.
15 |     """
16 |     # Convert raw action to our action
17 | 
18 |     origin_action = step['action']
19 |     step['action']={}
20 |     action=step['action']
21 |     action['terminate']=terminate_act_to_bool(origin_action[8])
22 |     
23 |     
24 |     eef_pos=origin_action[:3]
25 |     # eef_ang=quaternion_to_euler(origin_action[3:7])
26 |     eef_ang = origin_action[3:7]
27 |     grip_open=origin_action[7:8]
28 |     # No base found
29 | 
30 |     # Concatenate the action
31 |     action['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0)
32 | 
33 |     # Write the action format
34 |     action['format'] = tf.constant(
35 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
36 |     
37 |     # Convert raw state to our state
38 |     state = step['observation']
39 |     # Concatenate the state
40 |     arm_joint_ang=state['state'][:7]
41 |     grip_open=state['state'][7:8] * 11.765  # rescale to [0, 1]
42 |     state['arm_concat'] = tf.concat([arm_joint_ang,grip_open],axis=0)
43 |     # Write the state format
44 |     state['format'] = tf.constant(
45 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos")
46 | 
47 |     # Clean the task instruction
48 |     # Define the replacements (old, new) as a dictionary
49 |     replacements = {
50 |         '_': ' ',
51 |         '1f': ' ',
52 |         '4f': ' ',
53 |         '-': ' ',
54 |         '50': ' ',
55 |         '55': ' ',
56 |         '56': ' ',
57 |         
58 |     }
59 |     instr = step['language_instruction']
60 |     instr = clean_task_instruction(instr, replacements)
61 |     step['observation']['natural_language_instruction'] = instr
62 | 
63 |     return step
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     import tensorflow_datasets as tfds
68 |     from data.utils import dataset_to_path
69 | 
70 |     DATASET_DIR = 'data/datasets/openx_embod'
71 |     DATASET_NAME = 'cmu_play_fusion'
72 |     # Load the dataset
73 |     dataset = tfds.builder_from_directory(
74 |         builder_dir=dataset_to_path(
75 |             DATASET_NAME, DATASET_DIR))
76 |     dataset = dataset.as_dataset(split='all')
77 | 
78 |     # Inspect the dataset
79 |     for episode in dataset:
80 |         for step in episode['steps']:
81 |             print(step['action'][6:7])
82 | 
83 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/cmu_stretch.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler,euler_to_quaternion
 4 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 5 |     """
 6 |     Convert terminate action to a boolean, where True means terminate.
 7 |     """
 8 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
 9 | 
10 | def process_step(step: dict) -> dict:
11 |     """
12 |     Unify the action format and clean the task instruction.
13 | 
14 |     DO NOT use python list, use tf.TensorArray instead.
15 |     """
16 |     # Convert raw action to our action
17 | 
18 |     origin_action = step['action']
19 |     step['action']={}
20 |     action=step['action']
21 |     action['terminate']=terminate_act_to_bool(origin_action[7])
22 |     
23 |     
24 |     eef_pos=origin_action[:3]
25 |     eef_ang=origin_action[3:6]
26 |     eef_ang = euler_to_quaternion(eef_ang)
27 |     grip_open=origin_action[6:7]
28 |     # No base found
29 | 
30 |     # Concatenate the action
31 |     action['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0)
32 | 
33 |     # Write the action format
34 |     action['format'] = tf.constant(
35 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
36 |     
37 |     # Convert raw state to our state
38 |     state = step['observation']
39 |     # Concatenate the state
40 |     eef_pos_x = state['state'][0:1]
41 |     eef_pos_z = state['state'][2:3]
42 |     grip_open = state['state'][3:4]
43 |     state['arm_concat'] = tf.concat(
44 |         [eef_pos_x, eef_pos_z, grip_open], axis=0)
45 |     # Write the state format
46 |     state['format'] = tf.constant(
47 |         "eef_pos_x,eef_pos_z,gripper_open")
48 | 
49 |     # Clean the task instruction
50 |     # Define the replacements (old, new) as a dictionary
51 |     replacements = {
52 |         '_': ' ',
53 |         '1f': ' ',
54 |         '4f': ' ',
55 |         '-': ' ',
56 |         '50': ' ',
57 |         '55': ' ',
58 |         '56': ' ',
59 |         
60 |     }
61 |     instr = step['language_instruction']
62 |     instr = clean_task_instruction(instr, replacements)
63 |     step['observation']['natural_language_instruction'] = instr
64 | 
65 |     return step
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     import tensorflow_datasets as tfds
70 |     from data.utils import dataset_to_path
71 | 
72 |     DATASET_DIR = 'data/datasets/openx_embod'
73 |     DATASET_NAME = 'cmu_stretch'
74 |     # Load the dataset
75 |     dataset = tfds.builder_from_directory(
76 |         builder_dir=dataset_to_path(
77 |             DATASET_NAME, DATASET_DIR))
78 |     dataset = dataset.as_dataset(split='all')
79 | 
80 |     # Inspect the dataset
81 |     for episode in dataset:
82 |         for step in episode['steps']:
83 |             print(step['action'][6:7])
84 | 
85 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/columbia_cairlab_pusht_real.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     # Convert raw action to our action
20 |     action = step['action']
21 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
22 | 
23 |     eef_delta_pos = action['world_vector']
24 |     eef_ang = action['rotation_delta']
25 |     eef_ang = euler_to_quaternion(eef_ang)
26 | 
27 |     # Ignore action['gripper_open']: 1 if close gripper, -1 if open gripper, 0 if no change.
28 | 
29 |     # No base found
30 | 
31 |     # Concatenate the action
32 |     arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
33 |     action['arm_concat'] = arm_action
34 | 
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
38 | 
39 |     # Convert raw state to our state
40 |     state = step['observation']
41 |     eef_pos = state['robot_state']
42 |     
43 |     # Concatenate the state
44 |     state['arm_concat'] = eef_pos
45 | 
46 |     # Write the state format
47 |     state['format'] = tf.constant(
48 |         "eef_pos_x,eef_pos_y")
49 | 
50 |     # Clean the task instruction
51 |     # Define the replacements (old, new) as a dictionary
52 |     replacements = {
53 |         '_': ' ',
54 |         '1f': ' ',
55 |         '4f': ' ',
56 |         '-': ' ',
57 |         '50': ' ',
58 |         '55': ' ',
59 |         '56': ' ',
60 |         
61 |     }
62 |     instr = step['observation']['natural_language_instruction']
63 |     instr = clean_task_instruction(instr, replacements)
64 |     step['observation']['natural_language_instruction'] = instr
65 | 
66 |     return step
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     import tensorflow_datasets as tfds
71 |     from data.utils import dataset_to_path
72 | 
73 |     DATASET_DIR = 'data/datasets/openx_embod'
74 |     DATASET_NAME = 'columbia_cairlab_pusht_real'
75 |     # Load the dataset
76 |     dataset = tfds.builder_from_directory(
77 |         builder_dir=dataset_to_path(
78 |             DATASET_NAME, DATASET_DIR))
79 |     dataset = dataset.as_dataset(split='all')
80 | 
81 |     # Inspect the dataset
82 |     for episode in dataset:
83 |         for step in episode['steps']:
84 |             print(step)
85 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/dlr_edan_shared_control_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class].
23 |     eef_delta_pos = action[:3]
24 |     eef_ang = tf.gather(action[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
25 |     eef_ang = euler_to_quaternion(eef_ang)
26 |     # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close.
27 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
28 | 
29 |     # Concatenate the action
30 |     step['action'] = {}
31 |     action = step['action']
32 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0)
33 |     action['terminate'] = step['is_terminal']
34 | 
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
38 | 
39 |     # Convert raw state to our state
40 |     state = step['observation']
41 |     state_vec = state['state']
42 |     # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler("zxy") Class, 6x robot EEF wrench].
43 |     eef_pos = state_vec[:3]
44 |     eef_ang = tf.gather(state_vec[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
45 |     eef_ang = euler_to_rotation_matrix(eef_ang)
46 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
47 |     grip_open = tf.expand_dims(1 - state_vec[6], axis=0)
48 |     # Concatenate the state
49 |     state['arm_concat'] = tf.concat([eef_pos, eef_ang,grip_open], axis=0)
50 | 
51 |     # Write the state format
52 |     state['format'] = tf.constant(
53 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
54 | 
55 |     # Clean the task instruction
56 |     # Define the replacements (old, new) as a dictionary
57 |     replacements = {
58 |         '_': ' ',
59 |         '1f': ' ',
60 |         '4f': ' ',
61 |         '-': ' ',
62 |         '50': ' ',
63 |         '55': ' ',
64 |         '56': ' ',
65 |         
66 |     }
67 |     instr = step['language_instruction']
68 |     instr = clean_task_instruction(instr, replacements)
69 |     step['observation']['natural_language_instruction'] = instr
70 | 
71 |     return step
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     import tensorflow_datasets as tfds
76 |     from data.utils import dataset_to_path
77 | 
78 |     DATASET_DIR = 'data/datasets/openx_embod'
79 |     DATASET_NAME = 'fractal20220817_data'
80 |     # Load the dataset
81 |     dataset = tfds.builder_from_directory(
82 |         builder_dir=dataset_to_path(
83 |             DATASET_NAME, DATASET_DIR))
84 |     dataset = dataset.as_dataset(split='all')
85 | 
86 |     # Inspect the dataset
87 |     for episode in dataset:
88 |         for step in episode['steps']:
89 |             print(step)
90 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/dlr_sara_grid_clamp_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class].
23 |     eef_delta_pos = action[:3]
24 |     eef_ang = tf.gather(action[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
25 |     eef_ang = euler_to_quaternion(eef_ang)
26 |     # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close.
27 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
28 | 
29 |     # Concatenate the action
30 |     step['action'] = {}
31 |     action = step['action']
32 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0)
33 |     action['terminate'] = step['is_terminal']
34 | 
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
38 | 
39 |     # Convert raw state to our state
40 |     state = step['observation']
41 |     state_vec = state['state']
42 |     # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler("zxy") Class, 6x robot EEF wrench].
43 |     eef_pos = state_vec[:3]
44 |     eef_ang = tf.gather(state_vec[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
45 |     eef_ang = euler_to_rotation_matrix(eef_ang)
46 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
47 |     # joint_pos = state_vec[6:12]   # EEF wrench is not the joint posit 
48 |     # Concatenate the state
49 |     # state['arm_concat'] = tf.concat([joint_pos,eef_pos, eef_ang], axis=0)
50 |     state['arm_concat'] = tf.concat([eef_pos, eef_ang], axis=0)
51 | 
52 |     # Write the state format
53 |     state['format'] = tf.constant(
54 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
55 | 
56 |     # Define the task instruction
57 |     step['observation']['natural_language_instruction'] = tf.constant(
58 |         "Place the grid clamp on the table, aligning the protrusion on its bottom with the slots on the table.")
59 | 
60 |     return step
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     import tensorflow_datasets as tfds
65 |     from data.utils import dataset_to_path
66 | 
67 |     DATASET_DIR = 'data/datasets/openx_embod'
68 |     DATASET_NAME = 'fractal20220817_data'
69 |     # Load the dataset
70 |     dataset = tfds.builder_from_directory(
71 |         builder_dir=dataset_to_path(
72 |             DATASET_NAME, DATASET_DIR))
73 |     dataset = dataset.as_dataset(split='all')
74 | 
75 |     # Inspect the dataset
76 |     for episode in dataset:
77 |         for step in episode['steps']:
78 |             print(step)
79 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/dlr_sara_pour_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class].
23 |     eef_delta_pos = action[:3]
24 |     eef_ang = tf.gather(action[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
25 |     eef_ang = euler_to_quaternion(eef_ang)
26 |     # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close.
27 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
28 |     # Concatenate the action
29 |     step['action'] = {}
30 |     action = step['action']
31 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0)
32 |     action['terminate'] = step['is_terminal']
33 |     
34 |     # Write the action format
35 |     action['format'] = tf.constant(
36 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
37 | 
38 |     # Convert raw state to our state
39 |     state = step['observation']
40 |     state_vec = state['state']
41 |     # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class].
42 |     eef_pos = state_vec[:3]
43 |     eef_ang = tf.gather(state_vec[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
44 |     eef_ang = euler_to_rotation_matrix(eef_ang)
45 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
46 | 
47 |     # Concatenate the state
48 |     state['arm_concat'] = tf.concat([ eef_pos, eef_ang], axis=0)
49 | 
50 |     # Write the state format
51 |     state['format'] = tf.constant(
52 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
53 | 
54 |     # Clean the task instruction
55 |     # Define the replacements (old, new) as a dictionary
56 |     replacements = {
57 |         '_': ' ',
58 |         '1f': ' ',
59 |         '4f': ' ',
60 |         '-': ' ',
61 |         '50': ' ',
62 |         '55': ' ',
63 |         '56': ' ',
64 |         
65 |     }
66 |     instr = step['language_instruction']
67 |     instr = clean_task_instruction(instr, replacements)
68 |     step['observation']['natural_language_instruction'] = instr
69 | 
70 |     return step
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     import tensorflow_datasets as tfds
75 |     from data.utils import dataset_to_path
76 | 
77 |     DATASET_DIR = 'data/datasets/openx_embod'
78 |     DATASET_NAME = 'fractal20220817_data'
79 |     # Load the dataset
80 |     dataset = tfds.builder_from_directory(
81 |         builder_dir=dataset_to_path(
82 |             DATASET_NAME, DATASET_DIR))
83 |     dataset = dataset.as_dataset(split='all')
84 | 
85 |     # Inspect the dataset
86 |     for episode in dataset:
87 |         for step in episode['steps']:
88 |             print(step)
89 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/dobbe.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
 4 | 
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 |     arm_action = step['action']
14 | 
15 |     # Concatenate the action
16 |     step['action'] = {}
17 |     action = step['action']
18 |     action['arm_concat'] = arm_action
19 |     # Write the action format
20 |     action['format'] = tf.constant(
21 |         "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open")
22 |     action['terminate'] = step['is_terminal']
23 | 
24 |     # The state has any problem?
25 |     state = step['observation']
26 |     eef_pos = state['xyz']
27 |     # Clip eef_pos to be [-10, 10] for stability
28 |     eef_pos = tf.clip_by_value(eef_pos, -10, 10)
29 |     eef_ang = state['rot']
30 |     eef_ang = euler_to_rotation_matrix(eef_ang)
31 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
32 |     grip_pos = state['gripper']
33 | 
34 |     # Concatenate the state
35 |     state['arm_concat'] = tf.concat([
36 |         grip_pos,eef_pos,eef_ang], axis=0)
37 | 
38 |     # Write the state format
39 |     state['format'] = tf.constant(
40 |         "gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
41 | 
42 |     # Clean the task instruction
43 |     # Define the replacements (old, new) as a dictionary
44 |     replacements = {
45 |         '_': ' ',
46 |         '1f': ' ',
47 |         '4f': ' ',
48 |         '-': ' ',
49 |         '50': ' ',
50 |         '55': ' ',
51 |         '56': ' ',
52 |     }
53 |     instr = step['language_instruction']
54 |     instr = clean_task_instruction(instr, replacements)
55 |     step['observation']['natural_language_instruction'] = instr
56 | 
57 |     return step
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     import tensorflow_datasets as tfds
62 |     from data.utils import dataset_to_path
63 |     from tqdm import tqdm
64 |     import numpy as np
65 | 
66 |     DATASET_DIR = 'data/datasets/openx_embod'
67 |     DATASET_NAME = 'dobbe'
68 |     # Load the dataset
69 |     dataset = tfds.builder_from_directory(
70 |         builder_dir=dataset_to_path(
71 |             DATASET_NAME, DATASET_DIR))
72 |     dataset = dataset.as_dataset(split='all')
73 |     # dataset = dataset.filter(
74 |     #     lambda x: tf.math.less(
75 |     #         tf.math.reduce_max(tf.math.abs(
76 |     #             tf.convert_to_tensor(
77 |     #                 [step['observation']['xyz'] for step in x['steps']]))), 3))
78 | 
79 |     # Inspect the dataset
80 |     for i, episode in tqdm(enumerate(dataset), total=5208):
81 |         res = []
82 |         for step in episode['steps']:
83 |             res.append(step['observation']['xyz'].numpy())
84 |         max_val = np.max(np.abs(res))
85 |         if max_val > 2:
86 |             print(f"Episode {i} has a max value of {max_val}")
87 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/droid.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
 4 | 
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 |     action_dict = step['action_dict']
14 | 
15 |     # Robot action
16 |     eef_pos = action_dict['cartesian_position'][:3]
17 |     eef_ang = action_dict['cartesian_position'][3:6]
18 |     eef_ang = euler_to_rotation_matrix(eef_ang)
19 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
20 |     eef_pos_vel = action_dict['cartesian_velocity'][:3]
21 |     eef_ang_vel = action_dict['cartesian_velocity'][3:6]
22 |     joint_pos = action_dict['joint_position']
23 |     joint_vel = action_dict['joint_velocity']
24 |     grip_pos = action_dict['gripper_position']
25 |     grip_vel = action_dict['gripper_velocity']
26 | 
27 |     # Concatenate the action
28 |     step['action'] = {}
29 |     action = step['action']
30 |     
31 |     arm_action = tf.concat([eef_pos, eef_ang, eef_pos_vel, eef_ang_vel, joint_pos, joint_vel, grip_pos, grip_vel], axis=0)
32 |     action['arm_concat'] = arm_action
33 |     action['terminate'] = step['is_terminal']
34 | 
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_0_vel")
38 | 
39 |     # Convert raw state to our state
40 |     # Robot state
41 |     state = step['observation']
42 |     eef_pos = state['cartesian_position'][:3]
43 |     eef_ang = state['cartesian_position'][3:6]
44 |     eef_ang = euler_to_rotation_matrix(eef_ang)
45 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
46 |     joint_pos = state['joint_position']
47 |     grip_pos = 1 - state['gripper_position']
48 | 
49 |     # Concatenate the state
50 |     state['arm_concat'] = tf.concat([
51 |         joint_pos,grip_pos,eef_pos,eef_ang], axis=0)
52 |     
53 | 
54 |     # Write the state format
55 |     state['format'] = tf.constant(
56 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
57 | 
58 |     # Clean the task instruction
59 |     # Define the replacements (old, new) as a dictionary
60 |     replacements = {
61 |         '_': ' ',
62 |         '1f': ' ',
63 |         '4f': ' ',
64 |         '-': ' ',
65 |         '50': ' ',
66 |         '55': ' ',
67 |         '56': ' ',
68 |         
69 |     }
70 |     instr = step['language_instruction']
71 |     instr = clean_task_instruction(instr, replacements)
72 |     step['observation']['natural_language_instruction'] = instr
73 | 
74 |     return step
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     pass
79 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/eth_agent_affordances.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
 4 | 
 5 | def process_step(step: dict) -> dict:
 6 |     """
 7 |     Unify the action format and clean the task instruction.
 8 | 
 9 |     DO NOT use python list, use tf.TensorArray instead.
10 |     """
11 |     # Convert raw action to our action
12 | 
13 |     origin_action = step['action']
14 |     step['action']={}
15 |     action=step['action']
16 |     action['terminate'] = step['is_terminal']
17 | 
18 |     eef_vel = origin_action[:3]
19 |     eef_ang_vel=origin_action[3:6]
20 |     # No base found
21 | 
22 |     # Concatenate the action
23 |     action['arm_concat'] = tf.concat([eef_vel,eef_ang_vel],axis=0)
24 | 
25 |     # Write the action format
26 |     action['format'] = tf.constant(
27 |         "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw")
28 | 
29 |     # Convert raw state to our state
30 |     state = step['observation']
31 |     # Concatenate the state
32 |     eef_pos = state['state'][:3]
33 |     eef_ang = tf.gather(state['state'][3:6], [2, 1, 0])
34 |     eef_ang = euler_to_rotation_matrix(eef_ang)
35 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
36 |     grip_open=state['state'][6:7]
37 |     # state['state'][8]:door opening angle
38 |     state['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0)
39 | 
40 |     # Write the state format
41 |     state['format'] = tf.constant(
42 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
43 |     # Clean the task instruction
44 |     # Define the replacements (old, new) as a dictionary
45 |     replacements = {
46 |         '_': ' ',
47 |         '1f': ' ',
48 |         '4f': ' ',
49 |         '-': ' ',
50 |         '50': ' ',
51 |         '55': ' ',
52 |         '56': ' ',
53 |         
54 |     }
55 |     instr = step['language_instruction']
56 |     instr = clean_task_instruction(instr, replacements)
57 |     step['observation']['natural_language_instruction'] = instr
58 | 
59 |     return step
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     import tensorflow_datasets as tfds
64 |     from data.utils import dataset_to_path
65 | 
66 |     DATASET_DIR = 'data/datasets/openx_embod'
67 |     DATASET_NAME = 'eth_agent_affordances'
68 |     # Load the dataset
69 |     dataset = tfds.builder_from_directory(
70 |         builder_dir=dataset_to_path(
71 |             DATASET_NAME, DATASET_DIR))
72 |     dataset = dataset.as_dataset(split='all')
73 | 
74 |     # Inspect the dataset
75 |     for episode in dataset:
76 |         for step in episode['steps']:
77 |             print(step)
78 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/fmb.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d, \
 4 |     quaternion_to_rotation_matrix
 5 | 
 6 | 
 7 | def process_step(step: dict) -> dict:
 8 |     """
 9 |     Unify the action format and clean the task instruction.
10 | 
11 |     DO NOT use python list, use tf.TensorArray instead.
12 |     """
13 |     # Convert raw action to our action
14 |     action = step['action']
15 | 
16 |     # Robot action
17 |     eef_pos = action[:3]
18 |     eef_ang = action[3:6]
19 |     eef_ang = euler_to_rotation_matrix(eef_ang)
20 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
21 |     gripper_open = action[6:7]
22 | 
23 |     # Concatenate the action
24 |     step['action'] = {}
25 |     action = step['action']
26 |     
27 |     arm_action = tf.concat([eef_pos, eef_ang, gripper_open], axis=0)
28 |     action['arm_concat'] = arm_action
29 |     action['terminate'] = step['is_terminal']
30 | 
31 |     # Write the action format
32 |     action['format'] = tf.constant(
33 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
34 | 
35 |     # Convert raw state to our state
36 |     # Robot state
37 |     state = step['observation']
38 |     eef_pos = state['eef_pose'][:3]
39 |     eef_ang = state['eef_pose'][3:]
40 |     eef_ang = quaternion_to_rotation_matrix(eef_ang)
41 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
42 |     eef_pos_vel = state['eef_vel'][:3]
43 |     eef_ang_vel = state['eef_vel'][3:]
44 |     joint_pos = state['joint_pos']
45 |     joint_vel = state['joint_vel']
46 |     grip_pos = 1 - state['state_gripper_pose']
47 |     grip_pos = tf.expand_dims(grip_pos, axis=0)
48 | 
49 |     # Concatenate the state
50 |     state['arm_concat'] = tf.concat([
51 |         joint_pos,joint_vel,grip_pos,eef_pos,eef_ang,eef_pos_vel,eef_ang_vel], axis=0)
52 | 
53 |     # Write the state format
54 |     state['format'] = tf.constant(
55 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw")
56 | 
57 |     # Clean the task instruction
58 |     # Define the replacements (old, new) as a dictionary
59 |     replacements = {
60 |         '_': ' ',
61 |         '1f': ' ',
62 |         '4f': ' ',
63 |         '-': ' ',
64 |         '50': ' ',
65 |         '55': ' ',
66 |         '56': ' ',
67 |         # Refine language instruction:
68 |         'object': 'brick and insert it into the slot of the matching shape'    
69 |     }
70 |     instr = step['language_instruction']
71 |     instr = clean_task_instruction(instr, replacements)
72 |     step['observation']['natural_language_instruction'] = instr
73 | 
74 |     return step
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     pass
79 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/fractal20220817_data.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, quaternion_to_rotation_matrix,\
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | 
 8 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 9 |     """
10 |     Convert terminate action to a boolean, where True means terminate.
11 |     """
12 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
13 | 
14 | 
15 | def process_step(step: dict) -> dict:
16 |     """
17 |     Unify the action format and clean the task instruction.
18 | 
19 |     DO NOT use python list, use tf.TensorArray instead.
20 |     """
21 |     # Convert raw action to our action
22 |     action = step['action']
23 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
24 | 
25 |     eef_delta_pos = action['world_vector']
26 |     eef_ang = action['rotation_delta']
27 |     eef_ang = euler_to_quaternion(eef_ang)
28 |     grip_open = 1 - (action['gripper_closedness_action'] + 1) / 2
29 |     # Multiplied by 3 Hz to get units m/s and rad/s
30 |     base_delta_pos = action['base_displacement_vector'] * 3
31 |     base_delta_ang = action['base_displacement_vertical_rotation'] * 3
32 | 
33 |     # Concatenate the action
34 |     arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
35 |     action['arm_concat'] = arm_action
36 |     base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0)
37 |     action['base_concat'] = base_action
38 | 
39 |     # Write the action format
40 |     action['format'] = tf.constant(
41 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open,base_vel_x,base_vel_y,base_angular_vel")
42 | 
43 |     # Convert raw state to our state
44 |     state = step['observation']
45 |     eef_pos = state['base_pose_tool_reached'][:3]
46 |     # eef_ang = quaternion_to_euler(state['base_pose_tool_reached'][3:])
47 |     eef_ang = quaternion_to_rotation_matrix(state['base_pose_tool_reached'][3:])
48 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
49 |     grip_open = 1 - state['gripper_closed']
50 |     
51 |     # Concatenate the state
52 |     state['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0)
53 | 
54 |     # Write the state format
55 |     state['format'] = tf.constant(
56 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
57 | 
58 |     # Clean the task instruction
59 |     # Define the replacements (old, new) as a dictionary
60 |     replacements = {
61 |         '_': ' ',
62 |         '1f': ' ',
63 |         '4f': ' ',
64 |         '-': ' ',
65 |         '50': ' ',
66 |         '55': ' ',
67 |         '56': ' ',
68 |         
69 |     }
70 |     instr = step['observation']['natural_language_instruction']
71 |     instr = clean_task_instruction(instr, replacements)
72 |     step['observation']['natural_language_instruction'] = instr
73 | 
74 |     return step
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     import tensorflow_datasets as tfds
79 |     from data.utils import dataset_to_path
80 | 
81 |     DATASET_DIR = 'data/datasets/openx_embod'
82 |     DATASET_NAME = 'fractal20220817_data'
83 |     # Load the dataset
84 |     dataset = tfds.builder_from_directory(
85 |         builder_dir=dataset_to_path(
86 |             DATASET_NAME, DATASET_DIR))
87 |     dataset = dataset.as_dataset(split='all')
88 | 
89 |     # Inspect the dataset
90 |     for episode in dataset:
91 |         for step in episode['steps']:
92 |             print(step)
93 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/iamlab_cmu_pickup_insert_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction,quaternion_to_euler
 4 | 
 5 | def process_step(step: dict) -> dict:
 6 |     """
 7 |     Unify the action format and clean the task instruction.
 8 | 
 9 |     DO NOT use python list, use tf.TensorArray instead.
10 |     """
11 |     # Convert raw action to our action
12 | 
13 |     origin_action = step['action']
14 |     step['action']={}
15 |     action=step['action']
16 | 
17 |     eef_delta_pos = origin_action[:3]
18 |     # delta ZYX euler angles
19 |     # eef_ang=quaternion_to_euler(origin_action[3:7])
20 |     eef_ang = origin_action[3:7]
21 |     grip_open=origin_action[7:8]
22 | 
23 |     # No base found
24 | 
25 |     # Concatenate the action
26 |     action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0)
27 | 
28 |     # Write the action format
29 |     action['format'] = tf.constant(
30 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
31 | 
32 |     # Convert raw state to our state
33 |     state = step['observation']
34 |     # Concatenate the state
35 |     # 7x robot joint angles, 1x gripper status, 6x joint torques, 6x end-effector force
36 |     arm_joint_ang=state['state'][:7]
37 | 
38 |     grip_open=state['state'][7:8]
39 | 
40 |     state['arm_concat'] = tf.concat([arm_joint_ang,grip_open],axis=0)
41 | 
42 |     # Write the state format
43 |     state['format'] = tf.constant(
44 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open")
45 | 
46 |     # Clean the task instruction
47 |     # Define the replacements (old, new) as a dictionary
48 |     replacements = {
49 |         '_': ' ',
50 |         '1f': ' ',
51 |         '4f': ' ',
52 |         '-': ' ',
53 |         '50': ' ',
54 |         '55': ' ',
55 |         '56': ' ',
56 |         
57 |     }
58 |     instr = step['language_instruction']
59 |     instr = clean_task_instruction(instr, replacements)
60 |     step['observation']['natural_language_instruction'] = instr
61 | 
62 |     return step
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     import tensorflow_datasets as tfds
67 |     from data.utils import dataset_to_path
68 | 
69 |     DATASET_DIR = 'data/datasets/openx_embod'
70 |     DATASET_NAME = 'iamlab_cmu_pickup_insert_converted_externally_to_rlds'
71 |     # Load the dataset
72 |     dataset = tfds.builder_from_directory(
73 |         builder_dir=dataset_to_path(
74 |             DATASET_NAME, DATASET_DIR))
75 |     dataset = dataset.as_dataset(split='all')
76 | 
77 |     # Inspect the dataset
78 |     for episode in dataset:
79 |         for step in episode['steps']:
80 |             print(step)
81 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/imperialcollege_sawyer_wrist_cam.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction
 4 | 
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 | 
14 |     origin_action = step['action']
15 |     step['action'] = {}
16 |     action = step['action']
17 | 
18 |     # Multiplied by 10 Hz to get units m/s and rad/s
19 |     eef_delta_pos = origin_action[:3] * 10
20 |     # delta ZYX euler angles == roll/pitch/yaw velocities
21 |     eef_ang = origin_action[3:6] * 10
22 |     grip_open = 1 - origin_action[6:7]
23 | 
24 |     # No base found
25 | 
26 |     # Concatenate the action
27 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open],axis=0)
28 | 
29 |     # Write the action format
30 |     action['format'] = tf.constant(
31 |         "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open")
32 | 
33 |     # Convert raw state to our state
34 |     # state = step['observation']
35 |     # # Concatenate the state
36 |     # grip_open=state['state']
37 |     # state['arm_concat'] = grip_open
38 | 
39 |     # Write the state format
40 |     # state['format'] = tf.constant(
41 |     #     "gripper_open")
42 | 
43 |     # Clean the task instruction
44 |     # Define the replacements (old, new) as a dictionary
45 |     replacements = {
46 |         '_': ' ',
47 |         '1f': ' ',
48 |         '4f': ' ',
49 |         '-': ' ',
50 |         '50': ' ',
51 |         '55': ' ',
52 |         '56': ' ',
53 |         
54 |     }
55 |     instr = step['language_instruction']
56 |     instr = clean_task_instruction(instr, replacements)
57 |     step['observation']['natural_language_instruction'] = instr
58 | 
59 |     return step
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     import tensorflow_datasets as tfds
64 |     from data.utils import dataset_to_path
65 | 
66 |     DATASET_DIR = 'data/datasets/openx_embod'
67 |     DATASET_NAME = 'imperialcollege_sawyer_wrist_cam'
68 |     # Load the dataset
69 |     dataset = tfds.builder_from_directory(
70 |         builder_dir=dataset_to_path(
71 |             DATASET_NAME, DATASET_DIR))
72 |     dataset = dataset.as_dataset(split='all')
73 | 
74 |     # Inspect the dataset
75 |     for episode in dataset:
76 |         for step in episode['steps']:
77 |             print(step)
78 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/jaco_play.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
23 |     eef_delta_pos = action['world_vector']
24 |     # eef_ang = action['rotation_delta']
25 |     # (NOTE) due to the formality problem, grip_open is not used
26 |     # grip_open = 1 - (action['gripper_closedness_action'] ) / 2
27 |     # base_delta_pos = action['base_displacement_vector']
28 |     # base_delta_ang = action['base_displacement_vertical_rotation']
29 | 
30 |     # Concatenate the action
31 |     arm_action = eef_delta_pos
32 |     action['arm_concat'] = arm_action
33 |     # base_action = tf.constant([0, 0, 0, 0], dtype=tf.float32)
34 |     # action['base_concat'] = None
35 | 
36 |     # Write the action format
37 |     action['format'] = tf.constant(
38 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z")
39 | 
40 |     # Convert raw state to our state
41 |     state = step['observation']
42 |     joint_pos = state['joint_pos']
43 |     eef_pos = state['end_effector_cartesian_pos'][:3]
44 |     eef_quat = state['end_effector_cartesian_pos'][3:]
45 |     eef_ang = quaternion_to_rotation_matrix(eef_quat)
46 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
47 |     eef_vel = state['end_effector_cartesian_velocity'][:3]
48 |     # We do not use angular velocity since it is very inaccurate in this environment
49 |     # eef_angular_vel = state['end_effector_cartesian_velocity'][3:]
50 |     # Concatenate the state
51 |     state['arm_concat'] = tf.concat([joint_pos, eef_pos, eef_ang, eef_vel], axis=0)
52 | 
53 |     # Write the state format
54 |     state['format'] = tf.constant(
55 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z")
56 | 
57 |     # Clean the task instruction
58 |     # Define the replacements (old, new) as a dictionary
59 |     replacements = {
60 |         '_': ' ',
61 |         '1f': ' ',
62 |         '4f': ' ',
63 |         '-': ' ',
64 |         '50': ' ',
65 |         '55': ' ',
66 |         '56': ' ',
67 |         
68 |     }
69 |     instr = step['observation']['natural_language_instruction']
70 |     instr = clean_task_instruction(instr, replacements)
71 |     step['observation']['natural_language_instruction'] = instr
72 | 
73 |     return step
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     import tensorflow_datasets as tfds
78 |     from data.utils import dataset_to_path
79 | 
80 |     DATASET_DIR = 'data/datasets/openx_embod'
81 |     DATASET_NAME = 'jaco_play'
82 |     # Load the dataset
83 |     dataset = tfds.builder_from_directory(
84 |         builder_dir=dataset_to_path(
85 |             DATASET_NAME, DATASET_DIR))
86 |     dataset = dataset.as_dataset(split='all')
87 | 
88 |     # Inspect the dataset
89 |     for episode in dataset:
90 |         for step in episode['steps']:
91 |             print(step)
92 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/language_table.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction
 4 | 
 5 | def process_step(step: dict) -> dict:
 6 |     """
 7 |     Unify the action format and clean the task instruction.
 8 | 
 9 |     DO NOT use python list, use tf.TensorArray instead.
10 |     """
11 |     # Convert raw action to our action
12 | 
13 |     origin_action = step['action']
14 |     step['action']={}
15 |     action=step['action']
16 |     
17 |     eef_delta_pos=origin_action
18 |     # No base found
19 | 
20 |     # Concatenate the action
21 |     action['arm_concat'] = eef_delta_pos
22 |     action['terminate'] = step['is_terminal']
23 | 
24 |     # Write the action format
25 |     action['format'] = tf.constant(
26 |         "eef_delta_pos_x,eef_delta_pos_y")
27 |     
28 |     # Convert raw state to our state
29 |     state = step['observation']
30 |     # Concatenate the state
31 |     eef_pos=state['effector_translation']
32 |     state['arm_concat'] = eef_pos
33 |     # Write the state format
34 |     state['format'] = tf.constant(
35 |         "eef_pos_x,eef_pos_y")
36 | 
37 |     # Clean the task instruction
38 |     # Define the replacements (old, new) as a dictionary
39 |     replacements = {
40 |         '_': ' ',
41 |         '1f': ' ',
42 |         '4f': ' ',
43 |         '-': ' ',
44 |         '50': ' ',
45 |         '55': ' ',
46 |         '56': ' ',
47 |         
48 |     }
49 |     instr = step['observation']['instruction']
50 |     # Convert bytes to tf.string
51 |     instr = tf.strings.unicode_encode(instr, 'UTF-8')
52 |     # Remove '\x00'
53 |     instr = tf.strings.regex_replace(instr, '\x00', '')
54 |     instr = clean_task_instruction(instr, replacements)
55 |     step['observation']['natural_language_instruction'] = instr
56 |     return step
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     import tensorflow_datasets as tfds
61 |     from data.utils import dataset_to_path
62 | 
63 |     DATASET_DIR = 'data/datasets/openx_embod'
64 |     DATASET_NAME = 'language_table'
65 |     # Load the dataset
66 |     dataset = tfds.builder_from_directory(
67 |         builder_dir=dataset_to_path(
68 |             DATASET_NAME, DATASET_DIR))
69 |     dataset = dataset.as_dataset(split='all')
70 | 
71 |     # Inspect the dataset
72 |     for episode in dataset:
73 |         for step in episode['steps']:
74 |             print(step)
75 | 
76 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/nyu_door_opening_surprising_effectiveness.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     # Convert raw action to our action
20 |     action = step['action']
21 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
22 | 
23 |     # Multiplied by 3 Hz to get units m/s and rad/s
24 |     eef_delta_pos = action['world_vector'] * 3
25 |     eef_ang = action['rotation_delta'] * 3
26 |     # Origin: [-0.28, 0.96]: open, close
27 |     # 1-Origin: [0.04, 1.28]: close, open
28 |     grip_open = 1 - action['gripper_closedness_action']
29 |     # base_delta_pos = action['base_displacement_vector']
30 |     # base_delta_ang = action['base_displacement_vertical_rotation']
31 | 
32 |     # Concatenate the action
33 |     arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
34 |     action['arm_concat'] = arm_action
35 |     # base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0)
36 |     # action['base_concat'] = base_action
37 | 
38 |     # Write the action format
39 |     action['format'] = tf.constant(
40 |         "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open")
41 | 
42 |     # Convert raw state to our state
43 |     # state = step['observation']
44 |     # eef_pos = state['base_pose_tool_reached'][:3]
45 |     # eef_ang = quaternion_to_euler(state['base_pose_tool_reached'][3:])
46 |     # grip_open = 1 - state['gripper_closed']
47 | 
48 |     # create empty tensor
49 |     # state['arm_concat'] = tf.constant([0, 0, 0, 0, 0, 0], dtype=tf.float32)
50 | 
51 |     # Write the state format
52 |     # state['format'] = tf.constant(
53 |     #     "")
54 | 
55 |     # Define the task instruction
56 |     step['observation']['natural_language_instruction'] = tf.constant(
57 |         "Open the cabinet door.")
58 | 
59 |     return step
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     import tensorflow_datasets as tfds
64 |     from data.utils import dataset_to_path
65 | 
66 |     DATASET_DIR = 'data/datasets/openx_embod'
67 |     DATASET_NAME = 'nyu_door_opening_surprising_effectiveness'
68 |     # Load the dataset
69 |     dataset = tfds.builder_from_directory(
70 |         builder_dir=dataset_to_path(
71 |             DATASET_NAME, DATASET_DIR))
72 |     dataset = dataset.as_dataset(split='all')
73 | 
74 |     # Inspect the dataset
75 |     for episode in dataset.take(1):
76 |         for step in episode['steps']:
77 |             print(step)
78 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/nyu_franka_play_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
11 | 
12 | def process_step(step: dict) -> dict:
13 |     """
14 |     Unify the action format and clean the task instruction.
15 | 
16 |     DO NOT use python list, use tf.TensorArray instead.
17 |     """
18 |     # Convert raw action to our action
19 | 
20 |     origin_action = step['action']
21 |     step['action']={}
22 |     action=step['action']
23 |     action['terminate']=terminate_act_to_bool(origin_action[14])
24 |     joint_vel=origin_action[:7]
25 |     # gripper_open: 1-open, -1-closed
26 |     
27 |     grip_open=tf.where(tf.equal(origin_action[13:14],tf.constant(1.0)),tf.constant(1.0),tf.constant(0.0))
28 |     eef_pos=origin_action[7:10]
29 |     eef_ang=origin_action[10:13]
30 |     eef_ang = euler_to_quaternion(eef_ang)
31 |     # No base found
32 | 
33 |     # Concatenate the action
34 |     action['arm_concat'] = tf.concat([joint_vel,grip_open,eef_pos,eef_ang],axis=0)
35 | 
36 |     # Write the action format
37 |     action['format'] = tf.constant(
38 |         "arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
39 | 
40 |     # Convert raw state to our state
41 |     state = step['observation']
42 |     # Concatenate the state
43 |     state_arm_joint = state['state'][:7]
44 |     eef_pos = state['state'][7:10]
45 |     eef_ang = euler_to_rotation_matrix(state['state'][10:13])
46 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
47 |     state['arm_concat'] = tf.concat([state_arm_joint,eef_pos,eef_ang],axis=0)
48 | 
49 |     # Write the state format
50 |     state['format'] = tf.constant(
51 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
52 | 
53 |     # Clean the task instruction
54 |     # Define the replacements (old, new) as a dictionary
55 |     replacements = {
56 |         '_': ' ',
57 |         '1f': ' ',
58 |         '4f': ' ',
59 |         '-': ' ',
60 |         '50': ' ',
61 |         '55': ' ',
62 |         '56': ' ',
63 |         
64 |     }
65 |     instr = step['language_instruction']
66 |     instr = clean_task_instruction(instr, replacements)
67 |     step['observation']['natural_language_instruction'] = instr
68 | 
69 |     return step
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     import tensorflow_datasets as tfds
74 |     from data.utils import dataset_to_path
75 | 
76 |     DATASET_DIR = 'data/datasets/openx_embod'
77 |     DATASET_NAME = 'nyu_franka_play_dataset_converted_externally_to_rlds'
78 |     # Load the dataset
79 |     dataset = tfds.builder_from_directory(
80 |         builder_dir=dataset_to_path(
81 |             DATASET_NAME, DATASET_DIR))
82 |     dataset = dataset.as_dataset(split='all')
83 | 
84 |     # Inspect the dataset
85 |     for episode in dataset:
86 |         for step in episode['steps']:
87 |             print(step)
88 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/nyu_rot_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 | 
14 |     origin_action = step['action']
15 |     step['action']={}
16 |     action=step['action']
17 |     action['terminate'] = step['is_terminal']
18 | 
19 |     eef_delta_pos = origin_action[:3]
20 |     eef_ang=origin_action[3:6]
21 |     eef_ang = euler_to_quaternion(eef_ang)
22 |     # gripper_open: 0-open, 1-closed
23 |     grip_open=tf.where(tf.equal(origin_action[6:],tf.constant(0.0)),tf.constant(1.0),tf.constant(0.0))
24 | 
25 |     # No base found
26 | 
27 |     # Concatenate the action
28 |     action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0)
29 | 
30 |     # Write the action format
31 |     action['format'] = tf.constant(
32 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
33 | 
34 |     # Convert raw state to our state
35 |     state = step['observation']
36 |     eef_pos=state['state'][:3]
37 |     eef_ang=state['state'][3:6]
38 |     eef_ang = euler_to_rotation_matrix(eef_ang)
39 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
40 |     grip_open=1-state['state'][6:7]
41 |     # Concatenate the state
42 |     state['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0)
43 | 
44 |     # Write the state format
45 |     state['format'] = tf.constant(
46 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
47 | 
48 |     # Clean the task instruction
49 |     # Define the replacements (old, new) as a dictionary
50 |     replacements = {
51 |         '_': ' ',
52 |         '1f': ' ',
53 |         '4f': ' ',
54 |         '-': ' ',
55 |         '50': ' ',
56 |         '55': ' ',
57 |         '56': ' ',
58 |         
59 |     }
60 |     instr = step['language_instruction']
61 |     instr = clean_task_instruction(instr, replacements)
62 |     step['observation']['natural_language_instruction'] = instr
63 | 
64 |     return step
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     import tensorflow_datasets as tfds
69 |     from data.utils import dataset_to_path
70 | 
71 |     DATASET_DIR = 'data/datasets/openx_embod'
72 |     DATASET_NAME = 'nyu_rot_dataset_converted_externally_to_rlds'
73 |     # Load the dataset
74 |     dataset = tfds.builder_from_directory(
75 |         builder_dir=dataset_to_path(
76 |             DATASET_NAME, DATASET_DIR))
77 |     dataset = dataset.as_dataset(split='all')
78 | 
79 |     # Inspect the dataset
80 |     for episode in dataset:
81 |         for step in episode['steps']:
82 |             print(step)
83 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/qut_dexterous_manpulation.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
 4 | 
 5 | 
 6 | def process_step(step: dict) -> dict:
 7 |     """
 8 |     Unify the action format and clean the task instruction.
 9 | 
10 |     DO NOT use python list, use tf.TensorArray instead.
11 |     """
12 |     # Convert raw action to our action
13 |     action = step['action']
14 |     eef_pos = action[:3]
15 |     eef_quat = action[3:7]
16 |     eef_ang = quaternion_to_rotation_matrix(eef_quat)
17 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
18 |     grip_pos = action[7:]
19 | 
20 |     # Concatenate the action
21 |     step['action'] = {}
22 |     action = step['action']
23 |     action['arm_concat'] = tf.concat([
24 |         grip_pos,eef_pos,eef_ang], axis=0)
25 |     # Write the action format
26 |     action['format'] = tf.constant(
27 |         "gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
28 |     action['terminate'] = step['is_terminal']
29 | 
30 |     # Convert raw state to our state
31 |     state = step['observation']
32 |     robot_state = state['state']
33 |     eef_pos = robot_state[:3]
34 |     eef_quat = robot_state[3:7]
35 |     eef_ang = quaternion_to_rotation_matrix(eef_quat)
36 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
37 |     joint_pos = robot_state[7:14]
38 |     grip_pos = robot_state[14:16] * 24.707  # rescale to [0, 1]
39 |     joint_vel = robot_state[16:23]
40 | 
41 |     # Concatenate the state
42 |     state['arm_concat'] = tf.concat([
43 |         grip_pos,eef_pos,eef_ang,joint_pos,joint_vel], axis=0)
44 | 
45 |     # Write the state format
46 |     state['format'] = tf.constant(
47 |         "gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel")
48 | 
49 |     # Clean the task instruction
50 |     # Define the replacements (old, new) as a dictionary
51 |     replacements = {
52 |         '_': ' ',
53 |         '1f': ' ',
54 |         '4f': ' ',
55 |         '-': ' ',
56 |         '50': ' ',
57 |         '55': ' ',
58 |         '56': ' ',
59 |     }
60 |     instr = step['language_instruction']
61 |     instr = clean_task_instruction(instr, replacements)
62 |     step['observation']['natural_language_instruction'] = instr
63 | 
64 |     return step
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     pass
69 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/robo_net.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 5 |     rotation_matrix_to_ortho6d
 6 | 
 7 | 
 8 | def process_step(step: dict) -> dict:
 9 |     """
10 |     Unify the action format and clean the task instruction.
11 | 
12 |     DO NOT use python list, use tf.TensorArray instead.
13 |     """
14 |     # Convert raw action to our action
15 |     action = step['action']
16 |     eef_delta_pos = action[:3]
17 |     eef_delta_angle_yaw = action[3:4]
18 |     eef_ang = tf.stack([0.0, 0.0, eef_delta_angle_yaw[0]], axis=0)
19 |     eef_ang = euler_to_quaternion(eef_ang)
20 |     eef_gripper_open = (1 - action[4:5]) / 2
21 |     
22 |     step['action'] = {}
23 |     action = step['action']
24 |     action['terminate'] = step['is_terminal']
25 | 
26 |     # No base found
27 | 
28 |     # Concatenate the action
29 |     arm_action = tf.concat([eef_delta_pos, eef_ang, eef_gripper_open], axis=0)
30 |     action['arm_concat'] = arm_action
31 | 
32 |     # Write the action format
33 |     action['format'] = tf.constant(
34 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
35 | 
36 |     # Convert raw state to our state
37 |     state = step['observation']
38 |     eef_pos = state['state'][:3]
39 |     eef_ang_yaw = state['state'][3:4]
40 |     eef_ang = tf.stack([0.0, 0.0, eef_ang_yaw[0]], axis=0)
41 |     eef_ang = euler_to_rotation_matrix(eef_ang)
42 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
43 |     grip_joint_pos = state['state'][4:5]
44 |     # If abs(grip_joint_pos) > 3.15, then convert it to the radian
45 |     grip_joint_pos = tf.cond(tf.greater(tf.abs(grip_joint_pos), 3.15), 
46 |                              lambda: grip_joint_pos / 180 * np.pi, 
47 |                              lambda: grip_joint_pos)
48 |     # Concatenate the state
49 |     state['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_joint_pos],axis=0)
50 | 
51 |     # Write the state format
52 |     state['format'] = tf.constant(
53 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
54 | 
55 |     # Clean the task instruction
56 |     # Define the replacements (old, new) as a dictionary
57 |     replacements = {
58 |         '_': ' ',
59 |         '1f': ' ',
60 |         '4f': ' ',
61 |         '-': ' ',
62 |         '50': ' ',
63 |         '55': ' ',
64 |         '56': ' ',
65 |         
66 |     }
67 |     instr = step['language_instruction']
68 |     instr = clean_task_instruction(instr, replacements)
69 |     step['observation']['natural_language_instruction'] = instr
70 | 
71 |     return step
72 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/robomimic_can_ph.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_datasets as tfds
 3 | from data.utils import clean_task_instruction, quaternion_to_euler
 4 | 
 5 | def load_dataset():
 6 |     builder = tfds.builder('robomimic_ph/can_ph_image')
 7 |     builder.download_and_prepare()
 8 |     ds = builder.as_dataset(split='train', shuffle_files=True)
 9 |     return ds
10 | 
11 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
12 |     """
13 |     Convert terminate action to a boolean, where True means terminate.
14 |     """
15 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
16 | 
17 | def process_step(step: dict) -> dict:
18 |     """
19 |     Unify the action format and clean the task instruction.
20 | 
21 |     DO NOT use python list, use tf.TensorArray instead.
22 |     """
23 |     # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg
24 |     # Convert raw action to our action
25 |     eef = step['action']
26 |     step['action'] = {}
27 |     action = step['action']
28 |     action['terminate'] = step['is_terminal']
29 | 
30 |     eef_delta_pos = eef[:3]
31 |     eef_ang = eef[3:6]
32 |     # No base found
33 | 
34 |     # Concatenate the action
35 |     arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
36 |     action['arm_concat'] = arm_action
37 |     
38 |     # Write the action format
39 |     action['format'] = tf.constant(
40 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw")
41 | 
42 |     # Convert raw state to our state
43 |     state = step['observation']
44 |     
45 |     arm_joint_pos = state['robot0_joint_pos']
46 |     arm_joint_vel = state['robot0_joint_vel']
47 |     gripper_pos = state['robot0_gripper_qpos']
48 |     gripper_vel = state['robot0_gripper_qvel']
49 |     eef_pos = state['robot0_eef_pos']
50 |     eef_ang = quaternion_to_euler(state['robot0_eef_quat'])
51 |     
52 |     state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0)
53 |     # convert to tf32
54 |     state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32)
55 |     # Write the state format
56 |     state['format'] = tf.constant(
57 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw")
58 | 
59 |     # Clean the task instruction
60 |     # Define the replacements (old, new) as a dictionary
61 |     replacements = {
62 |         '_': ' ',
63 |         '1f': ' ',
64 |         '4f': ' ',
65 |         '-': ' ',
66 |         '50': ' ',
67 |         '55': ' ',
68 |         '56': ' ',
69 |         
70 |     }
71 |     # manual added by lbg
72 |     instr = "lift the can into the the box"
73 |     instr = clean_task_instruction(instr, replacements)
74 |     step['observation']['natural_language_instruction'] = instr
75 | 
76 |     return step
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     import tensorflow_datasets as tfds
81 |     from data.utils import dataset_to_path
82 | 
83 |     DATASET_DIR = 'data/datasets/openx_embod'
84 |     DATASET_NAME = 'roboturk'
85 |     # Load the dataset
86 |     dataset = tfds.builder_from_directory(
87 |         builder_dir=dataset_to_path(
88 |             DATASET_NAME, DATASET_DIR))
89 |     dataset = dataset.as_dataset(split='all').take(1)
90 | 
91 |     # Inspect the dataset
92 |     ze=tf.constant(0.0)
93 |     for episode in dataset:
94 |         for step in episode['steps']:
95 |             print(step)
96 |             break
97 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/robomimic_lift_ph.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_datasets as tfds
 3 | from data.utils import clean_task_instruction, quaternion_to_euler
 4 | 
 5 | 
 6 | def load_dataset():
 7 |     builder = tfds.builder('robomimic_ph/lift_ph_image')
 8 |     builder.download_and_prepare()
 9 |     ds = builder.as_dataset(split='train', shuffle_files=True)
10 |     return ds
11 | 
12 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
13 |     """
14 |     Convert terminate action to a boolean, where True means terminate.
15 |     """
16 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
17 | 
18 | def process_step(step: dict) -> dict:
19 |     """
20 |     Unify the action format and clean the task instruction.
21 | 
22 |     DO NOT use python list, use tf.TensorArray instead.
23 |     """
24 |     # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg
25 |     # Convert raw action to our action
26 |     eef = step['action']
27 |     step['action'] = {}
28 |     action = step['action']
29 |     action['terminate'] = step['is_terminal']
30 | 
31 |     eef_delta_pos = eef[:3]
32 |     eef_ang = quaternion_to_euler(eef[3:])
33 |     
34 |     # No base found
35 | 
36 |     # Concatenate the action
37 |     arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
38 |     action['arm_concat'] = arm_action
39 | 
40 |     # Write the action format
41 |     action['format'] = tf.constant(
42 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw")
43 | 
44 |     # Convert raw state to our state
45 |     state = step['observation']
46 |     arm_joint_pos = state['robot0_joint_pos']
47 |     arm_joint_vel = state['robot0_joint_vel']
48 |     gripper_pos = state['robot0_gripper_qpos']
49 |     gripper_vel = state['robot0_gripper_qvel']
50 |     eef_pos = state['robot0_eef_pos']
51 |     eef_ang = quaternion_to_euler(state['robot0_eef_quat'])
52 |     
53 |     state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0)
54 |     # convert to tf32
55 |     state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32)
56 |     # Write the state format
57 |     state['format'] = tf.constant(
58 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw")
59 | 
60 |     # Clean the task instruction
61 |     # Define the replacements (old, new) as a dictionary
62 |     replacements = {
63 |         '_': ' ',
64 |         '1f': ' ',
65 |         '4f': ' ',
66 |         '-': ' ',
67 |         '50': ' ',
68 |         '55': ' ',
69 |         '56': ' ',
70 |         
71 |     }
72 |     # manual added by lbg
73 |     instr = "lift the object on the table"
74 |     instr = clean_task_instruction(instr, replacements)
75 |     step['observation']['natural_language_instruction'] = instr
76 | 
77 |     return step
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     import tensorflow_datasets as tfds
82 |     from data.utils import dataset_to_path
83 | 
84 |     DATASET_DIR = 'data/datasets/openx_embod'
85 |     DATASET_NAME = 'roboturk'
86 |     # Load the dataset
87 |     dataset = tfds.builder_from_directory(
88 |         builder_dir=dataset_to_path(
89 |             DATASET_NAME, DATASET_DIR))
90 |     dataset = dataset.as_dataset(split='all').take(1)
91 | 
92 |     # Inspect the dataset
93 |     ze=tf.constant(0.0)
94 |     for episode in dataset:
95 |         for step in episode['steps']:
96 |             print(step)
97 |             break
98 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/robomimic_square_ph.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_datasets as tfds
 3 | from data.utils import clean_task_instruction, quaternion_to_euler
 4 | 
 5 | 
 6 | def load_dataset():
 7 |     builder = tfds.builder('robomimic_ph/square_ph_image')
 8 |     builder.download_and_prepare()
 9 |     ds = builder.as_dataset(split='train', shuffle_files=True)
10 |     return ds
11 | 
12 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
13 |     """
14 |     Convert terminate action to a boolean, where True means terminate.
15 |     """
16 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
17 | 
18 | def process_step(step: dict) -> dict:
19 |     """
20 |     Unify the action format and clean the task instruction.
21 | 
22 |     DO NOT use python list, use tf.TensorArray instead.
23 |     """
24 |     # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg
25 |     # Convert raw action to our action
26 |     eef = step['action']
27 |     step['action'] = {}
28 |     action = step['action']
29 |     action['terminate'] = step['is_terminal']
30 | 
31 |     eef_delta_pos = eef[:3]
32 |     eef_ang = quaternion_to_euler(eef[3:])
33 |     
34 |     # No base found
35 | 
36 |     # Concatenate the action
37 |     arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
38 |     action['arm_concat'] = arm_action
39 | 
40 |     # Write the action format
41 |     action['format'] = tf.constant(
42 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw")
43 | 
44 |     # Convert raw state to our state
45 |     state = step['observation']
46 |     arm_joint_pos = state['robot0_joint_pos']
47 |     arm_joint_vel = state['robot0_joint_vel']
48 |     gripper_pos = state['robot0_gripper_qpos']
49 |     gripper_vel = state['robot0_gripper_qvel']
50 |     eef_pos = state['robot0_eef_pos']
51 |     eef_ang = quaternion_to_euler(state['robot0_eef_quat'])
52 |     
53 |     state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0)
54 |     # convert to tf32
55 |     state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32)
56 |     # Write the state format
57 |     state['format'] = tf.constant(
58 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw")
59 | 
60 |     # Clean the task instruction
61 |     # Define the replacements (old, new) as a dictionary
62 |     replacements = {
63 |         '_': ' ',
64 |         '1f': ' ',
65 |         '4f': ' ',
66 |         '-': ' ',
67 |         '50': ' ',
68 |         '55': ' ',
69 |         '56': ' ',
70 |         
71 |     }
72 |     # manual added by lbg
73 |     instr = "move the square across the cube"
74 |     instr = clean_task_instruction(instr, replacements)
75 |     step['observation']['natural_language_instruction'] = instr
76 | 
77 |     return step
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     import tensorflow_datasets as tfds
82 |     from data.utils import dataset_to_path
83 | 
84 |     DATASET_DIR = 'data/datasets/openx_embod'
85 |     DATASET_NAME = 'roboturk'
86 |     # Load the dataset
87 |     dataset = tfds.builder_from_directory(
88 |         builder_dir=dataset_to_path(
89 |             DATASET_NAME, DATASET_DIR))
90 |     dataset = dataset.as_dataset(split='all').take(1)
91 | 
92 |     # Inspect the dataset
93 |     ze=tf.constant(0.0)
94 |     for episode in dataset:
95 |         for step in episode['steps']:
96 |             print(step)
97 |             break
98 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/roboturk.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     # Convert raw action to our action
20 |     action = step['action']
21 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
22 | 
23 |     eef_delta_pos = action['world_vector']
24 |     eef_ang = action['rotation_delta']
25 |     eef_ang = euler_to_quaternion(eef_ang)
26 |     
27 |     grip_open = tf.where(action['gripper_closedness_action']<0,tf.constant(1.0),tf.constant(0.0))
28 | 
29 |     # No base found
30 | 
31 |     # Concatenate the action
32 |     arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
33 |     action['arm_concat'] = arm_action
34 | 
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
38 | 
39 |     # No state found
40 | 
41 |     # Clean the task instruction
42 |     # Define the replacements (old, new) as a dictionary
43 |     replacements = {
44 |         '_': ' ',
45 |         '1f': ' ',
46 |         '4f': ' ',
47 |         '-': ' ',
48 |         '50': ' ',
49 |         '55': ' ',
50 |         '56': ' ',
51 |         
52 |     }
53 |     instr = step['observation']['natural_language_instruction']
54 |     instr = clean_task_instruction(instr, replacements)
55 |     step['observation']['natural_language_instruction'] = instr
56 | 
57 |     return step
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     import tensorflow_datasets as tfds
62 |     from data.utils import dataset_to_path
63 | 
64 |     DATASET_DIR = 'data/datasets/openx_embod'
65 |     DATASET_NAME = 'roboturk'
66 |     # Load the dataset
67 |     dataset = tfds.builder_from_directory(
68 |         builder_dir=dataset_to_path(
69 |             DATASET_NAME, DATASET_DIR))
70 |     dataset = dataset.as_dataset(split='all').take(1)
71 | 
72 |     # Inspect the dataset
73 |     ze=tf.constant(0.0)
74 |     for episode in dataset:
75 |         for step in episode['steps']:
76 |             print(step)
77 |             break
78 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
 4 | 
 5 | def process_step(step: dict) -> dict:
 6 |     """
 7 |     Unify the action format and clean the task instruction.
 8 | 
 9 |     DO NOT use python list, use tf.TensorArray instead.
10 |     """
11 |     # Convert raw action to our action
12 | 
13 |     origin_action = step['action']
14 |     step['action']={}
15 |     action=step['action']
16 |     action['terminate'] = step['is_terminal']
17 | 
18 |     eef_delta_pos = origin_action[:3]
19 | 
20 |     # Ignore grip_open: -0.15~0.15
21 | 
22 |     # No base found
23 | 
24 |     # Concatenate the action
25 |     action['arm_concat'] = eef_delta_pos
26 | 
27 |     # Write the action format
28 |     action['format'] = tf.constant(
29 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z")
30 | 
31 |     # Convert raw state to our state
32 |     state = step['observation']
33 |     eef_quat = state['ee_orientation']
34 |     eef_ang = quaternion_to_rotation_matrix(eef_quat)
35 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
36 |     eef_ang_vel = state['ee_orientation_vel']
37 |     eef_pos = state['ee_position']
38 |     eef_vel = state['ee_vel']
39 |     joint_pos = state['joint_pos']
40 |     joint_vel = state['joint_vel']
41 |     gripper_open = state['state'][7:8]
42 |     
43 |     # Concatenate the state
44 |     state['arm_concat'] = tf.concat([joint_pos,gripper_open,joint_vel,eef_pos,eef_ang,eef_vel,eef_ang_vel],axis=0)
45 | 
46 |     # Write the state format
47 |     state['format'] = tf.constant(
48 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw")
49 | 
50 |     # Clean the task instruction
51 |     # Define the replacements (old, new) as a dictionary
52 |     replacements = {
53 |         '_': ' ',
54 |         '1f': ' ',
55 |         '4f': ' ',
56 |         '-': ' ',
57 |         '50': ' ',
58 |         '55': ' ',
59 |         '56': ' ',
60 |         
61 |     }
62 |     instr = step['language_instruction']
63 |     instr = clean_task_instruction(instr, replacements)
64 |     step['observation']['natural_language_instruction'] = instr
65 | 
66 |     return step
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     import tensorflow_datasets as tfds
71 |     from data.utils import dataset_to_path
72 | 
73 |     DATASET_DIR = 'data/datasets/openx_embod'
74 |     DATASET_NAME = 'stanford_kuka_multimodal_dataset_converted_externally_to_rlds'
75 |     # Load the dataset
76 |     dataset = tfds.builder_from_directory(
77 |         builder_dir=dataset_to_path(
78 |             DATASET_NAME, DATASET_DIR))
79 |     dataset = dataset.as_dataset(split='all')
80 | 
81 |     # Inspect the dataset
82 |     for episode in dataset:
83 |         for step in episode['steps']:
84 |             print(step)
85 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/stanford_robocook_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     # Robot action, consists of [3x robot end-effector velocities, 3x robot end-effector angular velocities, 1x gripper velocity].
23 |     eef_delta_pos = action[:3]
24 |     eef_ang = action[3:6]
25 |     grip_vel = tf.expand_dims(action[6], axis=0)
26 | 
27 |     # Concatenate the action
28 |     # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
29 |     step['action'] = {}
30 |     action = step['action']
31 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_vel], axis=0)
32 |     action['terminate'] = step['is_terminal']
33 | 
34 |     # Write the action format
35 |     action['format'] = tf.constant(
36 |         "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_joint_0_vel")
37 | 
38 |     # Convert raw state to our state
39 |     state = step['observation']
40 |     state_vec = state['state']
41 |     # Robot state, consists of [3x robot end-effector position, 3x robot end-effector euler angles, 1x gripper position].
42 |     eef_pos = state_vec[:3]
43 |     eef_ang = state_vec[3:6]
44 |     eef_ang = euler_to_rotation_matrix(eef_ang)
45 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
46 |     grip_joint_pos = state_vec[6:7] * 13.03 # rescale to [0, 1]
47 | 
48 |     # Concatenate the state
49 |     state['arm_concat'] = tf.concat([grip_joint_pos,eef_pos, eef_ang], axis=0)
50 | 
51 |     # Write the state format
52 |     state['format'] = tf.constant(
53 |         "gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
54 | 
55 |     # Clean the task instruction
56 |     # Define the replacements (old, new) as a dictionary
57 |     replacements = {
58 |         '_': ' ',
59 |         '1f': ' ',
60 |         '4f': ' ',
61 |         '-': ' ',
62 |         '50': ' ',
63 |         '55': ' ',
64 |         '56': ' ',
65 |         
66 |     }
67 |     instr = step['language_instruction']
68 |     instr = clean_task_instruction(instr, replacements)
69 |     step['observation']['natural_language_instruction'] = instr
70 | 
71 |     return step
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     import tensorflow_datasets as tfds
76 |     from data.utils import dataset_to_path
77 | 
78 |     DATASET_DIR = 'data/datasets/openx_embod'
79 |     DATASET_NAME = 'fractal20220817_data'
80 |     # Load the dataset
81 |     dataset = tfds.builder_from_directory(
82 |         builder_dir=dataset_to_path(
83 |             DATASET_NAME, DATASET_DIR))
84 |     dataset = dataset.as_dataset(split='all')
85 | 
86 |     # Inspect the dataset
87 |     for episode in dataset:
88 |         for step in episode['steps']:
89 |             print(step)
90 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/tokyo_u_lsmo_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     # Convert raw action to our action
21 |     action = step['action']
22 |     # Robot action, consists of [3x endeffector position, 3x euler angles,1x gripper action].
23 |     eef_delta_pos = action[:3]
24 |     eef_ang = action[3:6]
25 |     eef_ang = euler_to_quaternion(eef_ang)
26 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
27 | 
28 |     # Concatenate the action
29 |     step['action'] = {}
30 |     action = step['action']
31 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
32 |     action['terminate'] = step['is_terminal']
33 | 
34 |     # Write the action format
35 |     action['format'] = tf.constant(
36 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
37 | 
38 |     # Convert raw state to our state
39 |     state = step['observation']
40 |     state_vec = state['state']
41 |     # Robot state, consists of [3x endeffector position, 3x euler angles,6x robot joint angles, 1x gripper position].
42 |     eef_pos = state_vec[:3]
43 |     eef_ang = state_vec[3:6]
44 |     eef_ang = euler_to_rotation_matrix(eef_ang)
45 |     eef_ang = rotation_matrix_to_ortho6d(eef_ang)
46 |     arm_joint_ang = state_vec[6:12]
47 |     grip_joint_pos = 1 - state_vec[12:13]
48 | 
49 |     # Concatenate the state
50 |     state['arm_concat'] = tf.concat([arm_joint_ang, grip_joint_pos, eef_pos, eef_ang], axis=0)
51 | 
52 |     # Write the state format
53 |     state['format'] = tf.constant(
54 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
55 | 
56 |     # Clean the task instruction
57 |     # Define the replacements (old, new) as a dictionary
58 |     replacements = {
59 |         '_': ' ',
60 |         '1f': ' ',
61 |         '4f': ' ',
62 |         '-': ' ',
63 |         '50': ' ',
64 |         '55': ' ',
65 |         '56': ' ',
66 |         
67 |     }
68 |     instr = step['language_instruction']
69 |     instr = clean_task_instruction(instr, replacements)
70 |     step['observation']['natural_language_instruction'] = instr
71 | 
72 |     return step
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     import tensorflow_datasets as tfds
77 |     from data.utils import dataset_to_path
78 | 
79 |     DATASET_DIR = 'data/datasets/openx_embod'
80 |     DATASET_NAME = 'fractal20220817_data'
81 |     # Load the dataset
82 |     dataset = tfds.builder_from_directory(
83 |         builder_dir=dataset_to_path(
84 |             DATASET_NAME, DATASET_DIR))
85 |     dataset = dataset.as_dataset(split='all')
86 | 
87 |     # Inspect the dataset
88 |     for episode in dataset:
89 |         for step in episode['steps']:
90 |             print(step)
91 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/toto.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     # Convert raw action to our action
20 |     action = step['action']
21 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
22 |     eef_delta_pos = action['world_vector']
23 |     eef_ang = action['rotation_delta']
24 |     eef_ang = euler_to_quaternion(eef_ang)
25 |     grip_open = tf.reshape(tf.where(action['open_gripper'],tf.constant(1.0),tf.constant(0.0)),(1,))
26 |     
27 |     # No base found
28 | 
29 |     # Concatenate the action
30 |     arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
31 |     action['arm_concat'] = arm_action
32 | 
33 |     # Write the action format
34 |     action['format'] = tf.constant(
35 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
36 | 
37 |     # Convert raw state to our state
38 |     state = step['observation']
39 |     joint_pos=state['state']
40 |     
41 |     # Concatenate the state
42 |     state['arm_concat'] = joint_pos
43 | 
44 |     # Write the state format
45 |     state['format'] = tf.constant(
46 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos")
47 | 
48 |     # Clean the task instruction
49 |     # Define the replacements (old, new) as a dictionary
50 |     replacements = {
51 |         '_': ' ',
52 |         '1f': ' ',
53 |         '4f': ' ',
54 |         '-': ' ',
55 |         '50': ' ',
56 |         '55': ' ',
57 |         '56': ' ',
58 |         
59 |     }
60 |     instr = step['observation']['natural_language_instruction']
61 |     instr = clean_task_instruction(instr, replacements)
62 |     step['observation']['natural_language_instruction'] = instr
63 | 
64 |     return step
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     import tensorflow_datasets as tfds
69 |     from data.utils import dataset_to_path
70 | 
71 |     DATASET_DIR = 'data/datasets/openx_embod'
72 |     DATASET_NAME = 'toto'
73 |     # Load the dataset
74 |     dataset = tfds.builder_from_directory(
75 |         builder_dir=dataset_to_path(
76 |             DATASET_NAME, DATASET_DIR))
77 |     dataset = dataset.as_dataset(split='all')
78 | 
79 |     # Inspect the dataset
80 |     for episode in dataset:
81 |         for step in episode['steps']:
82 |             print(step)
83 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/ucsd_kitchen_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     
20 |     # Convert raw action to our action
21 |     
22 |     # 8-dimensional action, consisting of end-effector position and orientation, gripper open/close and a episode termination action.
23 |     action = step['action']
24 |     eef_delta_pos = action[:3]
25 |     eef_ang = action[3:6]
26 |     eef_ang = euler_to_quaternion(eef_ang)
27 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
28 | 
29 |     # Concatenate the action
30 |     # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
31 |     step['action'] = {}
32 |     action = step['action']
33 |     
34 |     action['terminate'] = step['is_terminal']
35 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
36 |    
37 |     # Write the action format
38 |     action['format'] = tf.constant(
39 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
40 | 
41 |     # Convert raw state to our state
42 |     state = step['observation']['state']
43 |     # 21-dimensional joint states, consists of robot joint angles, joint velocity and joint torque.
44 |     arm_joint_pos = state[:7]
45 |     arm_joint_vel = state[7:14]
46 |     # arm_joint_torque = state[14:21]
47 | 
48 |     # Concatenate the state
49 |     
50 |     state = step['observation']
51 |     state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel], axis=0)
52 | 
53 |     # Write the state format
54 |     state['format'] = tf.constant(
55 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel")
56 | 
57 |     # Clean the task instruction
58 |     # Define the replacements (old, new) as a dictionary
59 |     replacements = {
60 |         '_': ' ',
61 |         '1f': ' ',
62 |         '4f': ' ',
63 |         '-': ' ',
64 |         '50': ' ',
65 |         '55': ' ',
66 |         '56': ' ',
67 |         
68 |     }
69 |     instr = step['language_instruction']
70 |     instr = clean_task_instruction(instr, replacements)
71 |     step['observation']['natural_language_instruction'] = instr
72 | 
73 |     return step
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     import tensorflow_datasets as tfds
78 |     from data.utils import dataset_to_path
79 | 
80 |     DATASET_DIR = 'data/datasets/openx_embod'
81 |     DATASET_NAME = 'fractal20220817_data'
82 |     # Load the dataset
83 |     dataset = tfds.builder_from_directory(
84 |         builder_dir=dataset_to_path(
85 |             DATASET_NAME, DATASET_DIR))
86 |     dataset = dataset.as_dataset(split='all')
87 | 
88 |     # Inspect the dataset
89 |     for episode in dataset:
90 |         for step in episode['steps']:
91 |             print(step)
92 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/ucsd_pick_and_place_dataset_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     
21 |     # Convert raw action to our action
22 |     action = step['action']
23 |     # Robot action, consists of [3x gripper velocities,1x gripper open/close torque].
24 |     eef_vel = action[:3]
25 |     grip_open = tf.expand_dims(1 - action[3], axis=0)
26 | 
27 |     # Concatenate the action
28 |     # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
29 |     step['action'] = {}
30 |     action = step['action']
31 |     
32 |     action['terminate'] = step['is_terminal']
33 |     action['arm_concat'] = tf.concat([eef_vel,grip_open], axis=0)  
34 |     # Write the action format
35 |     action['format'] = tf.constant(
36 |         "eef_vel_x,eef_vel_y,eef_vel_z,gripper_open")
37 | 
38 |     # Convert raw state to our state
39 |     state = step['observation']['state']
40 |     # 	Robot state, consists of [3x gripper position,3x gripper orientation, 1x finger distance].
41 |     gripper_pos = state[:3]/10  # dm -> m
42 |     gripper_ang = state[3:6]
43 |     gripper_ang = euler_to_rotation_matrix(gripper_ang)
44 |     gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
45 |     gripper_open = state[6:7]/6.03  # 1 for open
46 | 
47 | 
48 |     # Concatenate the state
49 |     state = step['observation']
50 |     state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0)
51 | 
52 |     # Write the state format
53 |     state['format'] = tf.constant(
54 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
55 | 
56 |     # Clean the task instruction
57 |     # Define the replacements (old, new) as a dictionary
58 |     replacements = {
59 |         '_': ' ',
60 |         '1f': ' ',
61 |         '4f': ' ',
62 |         '-': ' ',
63 |         '50': ' ',
64 |         '55': ' ',
65 |         '56': ' ',
66 |         
67 |     }
68 |     instr = step['language_instruction']
69 |     instr = clean_task_instruction(instr, replacements)
70 |     step['observation']['natural_language_instruction'] = instr
71 | 
72 |     return step
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     import tensorflow_datasets as tfds
77 |     from data.utils import dataset_to_path
78 | 
79 |     DATASET_DIR = 'data/datasets/openx_embod'
80 |     DATASET_NAME = 'fractal20220817_data'
81 |     # Load the dataset
82 |     dataset = tfds.builder_from_directory(
83 |         builder_dir=dataset_to_path(
84 |             DATASET_NAME, DATASET_DIR))
85 |     dataset = dataset.as_dataset(split='all')
86 | 
87 |     # Inspect the dataset
88 |     for episode in dataset:
89 |         for step in episode['steps']:
90 |             print(step)
91 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/uiuc_d3field.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, rotation_matrix_to_ortho6d
 4 | 
 5 | def process_step(step: dict) -> dict:
 6 |     """
 7 |     Unify the action format and clean the task instruction.
 8 | 
 9 |     DO NOT use python list, use tf.TensorArray instead.
10 |     """
11 |     # Convert raw action to our action
12 | 
13 |     origin_action = step['action']
14 |     step['action']={}
15 |     action=step['action']
16 |     action['terminate'] = step['is_terminal']
17 |     # Robot displacement from last frame
18 |     eef_delta_pos = origin_action[:3]
19 | 
20 |     # No base found
21 | 
22 |     # Concatenate the action
23 |     action['arm_concat'] = eef_delta_pos
24 | 
25 |     # Write the action format
26 |     action['format'] = tf.constant(
27 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z")
28 | 
29 |     # Convert raw state to our state
30 |     state = step['observation']
31 |     # Concatenate the state
32 |     # 4x4 Robot end-effector state
33 |     eef_mat=state['state']
34 |     eef_pos=eef_mat[:3,3]
35 |     rotation_matrix=eef_mat[:3,:3]
36 |     eef_ang = rotation_matrix_to_ortho6d(rotation_matrix)
37 |     state['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
38 | 
39 |     # Write the state format
40 |     state['format'] = tf.constant(
41 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
42 | 
43 |     # Clean the task instruction
44 |     # Define the replacements (old, new) as a dictionary
45 |     replacements = {
46 |         '_': ' ',
47 |         '1f': ' ',
48 |         '4f': ' ',
49 |         '-': ' ',
50 |         '50': ' ',
51 |         '55': ' ',
52 |         '56': ' ',
53 |         
54 |     }
55 |     instr = step['language_instruction']
56 |     instr = clean_task_instruction(instr, replacements)
57 |     step['observation']['natural_language_instruction'] = instr
58 | 
59 |     return step
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     import tensorflow_datasets as tfds
64 |     from data.utils import dataset_to_path
65 | 
66 |     DATASET_DIR = 'data/datasets/openx_embod'
67 |     DATASET_NAME = 'uiuc_d3field'
68 |     # Load the dataset
69 |     dataset = tfds.builder_from_directory(
70 |         builder_dir=dataset_to_path(
71 |             DATASET_NAME, DATASET_DIR))
72 |     dataset = dataset.as_dataset(split='all')
73 | 
74 |     # Inspect the dataset
75 |     for episode in dataset:
76 |         for step in episode['steps']:
77 |             print(step)
78 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/usc_cloth_sim_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     
20 |     # Convert raw action to our action
21 |     # Robot action, consists of x,y,z goal and picker commandpicker<0.5 = open, picker>0.5 = close.
22 |     action = step['action']
23 |     eef_delta_pos = action[:3]
24 |     grip_open = tf.cast(tf.expand_dims(action[3] < 0.5, axis=0), dtype=tf.float32)
25 | 
26 |     # Concatenate the action
27 |     step['action'] = {}
28 |     action = step['action']
29 |     
30 |     action['terminate'] = step['is_terminal']
31 |     action['arm_concat'] = tf.concat([eef_delta_pos, grip_open], axis=0)
32 |    
33 |     # Write the action format
34 |     action['format'] = tf.constant(
35 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,gripper_open")
36 | 
37 |     # State doesnt exist in this dataset
38 | 
39 |     # Clean the task instruction
40 |     # Define the replacements (old, new) as a dictionary
41 |     replacements = {
42 |         '_': ' ',
43 |         '1f': ' ',
44 |         '4f': ' ',
45 |         '-': ' ',
46 |         '50': ' ',
47 |         '55': ' ',
48 |         '56': ' ',
49 |         
50 |     }
51 |     instr = step['language_instruction']
52 |     instr = clean_task_instruction(instr, replacements)
53 |     step['observation']['natural_language_instruction'] = instr
54 | 
55 |     return step
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     import tensorflow_datasets as tfds
60 |     from data.utils import dataset_to_path
61 | 
62 |     DATASET_DIR = 'data/datasets/openx_embod'
63 |     DATASET_NAME = 'fractal20220817_data'
64 |     # Load the dataset
65 |     dataset = tfds.builder_from_directory(
66 |         builder_dir=dataset_to_path(
67 |             DATASET_NAME, DATASET_DIR))
68 |     dataset = dataset.as_dataset(split='all')
69 | 
70 |     # Inspect the dataset
71 |     for episode in dataset:
72 |         for step in episode['steps']:
73 |             print(step)
74 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/utaustin_mutex.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, rotation_matrix_to_ortho6d
 4 | 
 5 | def process_step(step: dict) -> dict:
 6 |     """
 7 |     Unify the action format and clean the task instruction.
 8 | 
 9 |     DO NOT use python list, use tf.TensorArray instead.
10 |     """
11 |     # Convert raw action to our action
12 | 
13 |     origin_action = step['action']
14 |     step['action']={}
15 |     action=step['action']
16 |     action['terminate'] = step['is_terminal']
17 |     # 6x end effector delta pose, 1x gripper position
18 |     eef_delta_pos = origin_action[:3]
19 |     eef_ang=origin_action[3:6]
20 |     eef_ang = euler_to_quaternion(eef_ang)
21 |     grip_open=origin_action[6:7]
22 |     # No base found
23 | 
24 |     # Concatenate the action
25 |     action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0)
26 |     
27 |     # Write the action format
28 |     action['format'] = tf.constant(
29 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
30 | 
31 |     # Convert raw state to our state
32 |     state = step['observation']
33 |     # Concatenate the state
34 |     arm_joint_ang=state['state'][:7]
35 |     grip_open=state['state'][7:8] * 13.21   # rescale to [0, 1]
36 |     # 4x4 Robot end-effector state
37 |     eef_mat = tf.transpose(tf.reshape(state['state'][8:], (4, 4)))
38 |     eef_pos = eef_mat[:3, 3]
39 |     rotation_matrix = eef_mat[:3, :3]
40 |     eef_ang = rotation_matrix_to_ortho6d(rotation_matrix)
41 |     state['arm_concat'] = tf.concat([arm_joint_ang,grip_open,eef_pos,eef_ang],axis=0)
42 | 
43 |     # Write the state format
44 |     state['format'] = tf.constant(
45 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
46 | 
47 |     # Clean the task instruction
48 |     # Define the replacements (old, new) as a dictionary
49 |     replacements = {
50 |         '_': ' ',
51 |         '1f': ' ',
52 |         '4f': ' ',
53 |         '-': ' ',
54 |         '50': ' ',
55 |         '55': ' ',
56 |         '56': ' ',
57 |         
58 |     }
59 |     instr = step['language_instruction']
60 |     instr = clean_task_instruction(instr, replacements)
61 |     step['observation']['natural_language_instruction'] = instr
62 | 
63 |     return step
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     import tensorflow_datasets as tfds
68 |     from data.utils import dataset_to_path
69 | 
70 |     DATASET_DIR = 'data/datasets/openx_embod'
71 |     DATASET_NAME = 'utaustin_mutex'
72 |     # Load the dataset
73 |     dataset = tfds.builder_from_directory(
74 |         builder_dir=dataset_to_path(
75 |             DATASET_NAME, DATASET_DIR))
76 |     dataset = dataset.as_dataset(split='all')
77 | 
78 |     # Inspect the dataset
79 |     for episode in dataset:
80 |         for step in episode['steps']:
81 |             print(step)
82 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/utokyo_pr2_opening_fridge_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     
21 |     # Convert raw action to our action
22 |     action = step['action']
23 |     # Robot action, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper open/close command, 1x terminal action].
24 |     eef_delta_pos = action[:3]/1000 # change from mm to m
25 |     eef_ang = action[3:6]
26 |     eef_ang = euler_to_quaternion(eef_ang)
27 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
28 | 
29 |     # Concatenate the action
30 |     step['action'] = {}
31 |     action = step['action']
32 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
33 |     
34 |     action['terminate'] = step['is_terminal']
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
38 | 
39 | 
40 |     # Convert raw state to our state
41 |     state = step['observation']['state']
42 |     # Robot state, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper position].
43 |     gripper_pos = state[:3]/1000 # change from mm to m
44 |     gripper_ang = state[3:6]
45 |     gripper_ang = euler_to_rotation_matrix(gripper_ang)
46 |     gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
47 |     gripper_open = state[6:7]/1000 * 11.54 # rescale to [0, 1]
48 | 
49 | 
50 |     # Concatenate the state
51 |     state = step['observation']
52 |     state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0)
53 | 
54 |     # Write the state format
55 |     state['format'] = tf.constant(
56 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_joint_0_pos")
57 | 
58 |     # Clean the task instruction
59 |     # Define the replacements (old, new) as a dictionary
60 |     replacements = {
61 |         '_': ' ',
62 |         '1f': ' ',
63 |         '4f': ' ',
64 |         '-': ' ',
65 |         '50': ' ',
66 |         '55': ' ',
67 |         '56': ' ',
68 |         
69 |     }
70 |     instr = step['language_instruction']
71 |     instr = clean_task_instruction(instr, replacements)
72 |     step['observation']['natural_language_instruction'] = instr
73 | 
74 |     return step
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     import tensorflow_datasets as tfds
79 |     from data.utils import dataset_to_path
80 | 
81 |     DATASET_DIR = 'data/datasets/openx_embod'
82 |     DATASET_NAME = 'fractal20220817_data'
83 |     # Load the dataset
84 |     dataset = tfds.builder_from_directory(
85 |         builder_dir=dataset_to_path(
86 |             DATASET_NAME, DATASET_DIR))
87 |     dataset = dataset.as_dataset(split='all')
88 | 
89 |     # Inspect the dataset
90 |     for episode in dataset:
91 |         for step in episode['steps']:
92 |             print(step)
93 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
 4 |     rotation_matrix_to_ortho6d
 5 | 
 6 | 
 7 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 8 |     """
 9 |     Convert terminate action to a boolean, where True means terminate.
10 |     """
11 |     return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
12 | 
13 | 
14 | def process_step(step: dict) -> dict:
15 |     """
16 |     Unify the action format and clean the task instruction.
17 | 
18 |     DO NOT use python list, use tf.TensorArray instead.
19 |     """
20 |     
21 |     # Convert raw action to our action
22 |     action = step['action']
23 |     # Robot action, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper open/close command, 1x terminal action].
24 |     eef_delta_pos = action[:3]/1000 # change from mm to m
25 |     eef_ang = action[3:6]
26 |     eef_ang = euler_to_quaternion(eef_ang)
27 |     grip_open = tf.expand_dims(1 - action[6], axis=0)
28 | 
29 |     # Concatenate the action
30 |     step['action'] = {}
31 |     action = step['action']
32 |     action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
33 |     
34 |     action['terminate'] = step['is_terminal']
35 |     # Write the action format
36 |     action['format'] = tf.constant(
37 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
38 | 
39 | 
40 |     # Convert raw state to our state
41 |     state = step['observation']['state']
42 |     # Robot state, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper position].
43 |     gripper_pos = state[:3]/1000 # change from mm to m
44 |     gripper_ang = state[3:6]
45 |     gripper_ang = euler_to_rotation_matrix(gripper_ang)
46 |     gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
47 |     gripper_open = state[6:7] / 86.12126922607422   # rescale to [0, 1]
48 | 
49 |     # Concatenate the state
50 |     state = step['observation']
51 |     state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0)
52 | 
53 |     # Write the state format
54 |     state['format'] = tf.constant(
55 |         "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_joint_0_pos")
56 | 
57 |     # Clean the task instruction
58 |     # Define the replacements (old, new) as a dictionary
59 |     replacements = {
60 |         '_': ' ',
61 |         '1f': ' ',
62 |         '4f': ' ',
63 |         '-': ' ',
64 |         '50': ' ',
65 |         '55': ' ',
66 |         '56': ' ',
67 |         
68 |     }
69 |     instr = step['language_instruction']
70 |     instr = clean_task_instruction(instr, replacements)
71 |     step['observation']['natural_language_instruction'] = instr
72 | 
73 |     return step
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     import tensorflow_datasets as tfds
78 |     from data.utils import dataset_to_path
79 | 
80 |     DATASET_DIR = 'data/datasets/openx_embod'
81 |     DATASET_NAME = 'fractal20220817_data'
82 |     # Load the dataset
83 |     dataset = tfds.builder_from_directory(
84 |         builder_dir=dataset_to_path(
85 |             DATASET_NAME, DATASET_DIR))
86 |     dataset = dataset.as_dataset(split='all')
87 | 
88 |     # Inspect the dataset
89 |     for episode in dataset:
90 |         for step in episode['steps']:
91 |             print(step)
92 | 


--------------------------------------------------------------------------------
/data/preprocess_scripts/viola.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from data.utils import clean_task_instruction, euler_to_quaternion, rotation_matrix_to_ortho6d
 4 | 
 5 | 
 6 | def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
 7 |     """
 8 |     Convert terminate action to a boolean, where True means terminate.
 9 |     """
10 |     return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
11 | 
12 | 
13 | def process_step(step: dict) -> dict:
14 |     """
15 |     Unify the action format and clean the task instruction.
16 | 
17 |     DO NOT use python list, use tf.TensorArray instead.
18 |     """
19 |     # Convert raw action to our action
20 |     action = step['action']
21 |     action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
22 | 
23 |     eef_delta_pos = action['world_vector']
24 |     eef_ang = action['rotation_delta']
25 |     eef_ang = euler_to_quaternion(eef_ang)
26 |     grip_open = tf.reshape(tf.where(action['gripper_closedness_action']<0,tf.constant(1.0),tf.constant(0.0)),(1,))
27 | 
28 |     # No base found
29 | 
30 |     # Concatenate the action
31 |     arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
32 |     action['arm_concat'] = arm_action
33 | 
34 |     # Write the action format
35 |     action['format'] = tf.constant(
36 |         "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
37 | 
38 |     # Convert raw state to our state
39 |     state = step['observation']
40 |     joint_pos=state['joint_states']
41 |     grip_open=state['gripper_states'] * 12.905  # rescale to [0, 1]
42 |     state_ee=state['ee_states']
43 |     transform_matrix = tf.transpose(tf.reshape(state_ee, (4, 4)))
44 |     eef_pos = transform_matrix[:3, 3]
45 |     rotation_matrix = transform_matrix[:3, :3]
46 |     eef_ang = rotation_matrix_to_ortho6d(rotation_matrix)
47 |     
48 |     # Concatenate the state
49 |     state['arm_concat'] = tf.concat([joint_pos,grip_open,eef_pos,eef_ang],axis=0)
50 | 
51 |     # Write the state format
52 |     state['format'] = tf.constant(
53 |         "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
54 | 
55 |     # Clean the task instruction
56 |     # Define the replacements (old, new) as a dictionary
57 |     replacements = {
58 |         '_': ' ',
59 |         '1f': ' ',
60 |         '4f': ' ',
61 |         '-': ' ',
62 |         '50': ' ',
63 |         '55': ' ',
64 |         '56': ' ',
65 |         
66 |     }
67 |     instr = step['observation']['natural_language_instruction']
68 |     instr = clean_task_instruction(instr, replacements)
69 |     step['observation']['natural_language_instruction'] = instr
70 | 
71 |     return step
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     import tensorflow_datasets as tfds
76 |     from data.utils import dataset_to_path
77 | 
78 |     DATASET_DIR = 'data/datasets/openx_embod'
79 |     DATASET_NAME = 'viola'
80 |     # Load the dataset
81 |     dataset = tfds.builder_from_directory(
82 |         builder_dir=dataset_to_path(
83 |             DATASET_NAME, DATASET_DIR))
84 |     dataset = dataset.as_dataset(split='all')
85 | 
86 |     # Inspect the dataset
87 |     for episode in dataset:
88 |         for step in episode['steps']:
89 |             print(step)
90 | 


--------------------------------------------------------------------------------
/data/roboset/download.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import os
 3 | from tqdm import tqdm
 4 | 
 5 | links = []
 6 | with open('links.txt', 'r', encoding='utf-8') as file:
 7 |     for line in file:
 8 |         links.append(line.strip())
 9 |         
10 | download_dir = "../datasets/roboset"
11 | os.makedirs(download_dir, exist_ok=True)
12 |         
13 | for link in links:
14 |     filename = os.path.basename(link)
15 |     filepath = os.path.join(download_dir, filename)
16 |     print(f"Downloading {filename} from {link}")
17 | 
18 |     response = requests.get(link, stream=True)
19 |     total_size_in_bytes = int(response.headers.get('content-length', 0))
20 |     block_size = 1024
21 |     
22 |     if os.path.exists(filepath):
23 |         local_size = os.path.getsize(filepath)
24 |         if local_size == total_size_in_bytes:
25 |             print(f"{filename} already exists")
26 |             continue
27 |     
28 |     progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
29 |     
30 |     with open(filepath, 'wb') as f:
31 |         for data in response.iter_content(block_size):
32 |             progress_bar.update(len(data))
33 |             f.write(data)
34 | 
35 |     progress_bar.close()
36 | 
37 |     if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
38 |         print("ERROR, something went wrong")
39 | 
40 |     print(f"Downloaded {filename}")
41 | 
42 | print("All files processed.")
43 | 


--------------------------------------------------------------------------------
/data/roboset/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | while true; do
 4 |     python download.py
 5 |     EXIT_CODE=$?
 6 |     if [ $EXIT_CODE -ne 0 ]; then
 7 |         echo "Download exited with code $EXIT_CODE. Restarting..."
 8 |     else
 9 |         echo "Download exited with code 0. Not restarting."
10 |         break
11 |     fi
12 | done
13 | 
14 | # Unzip all the files in the ../datasets/roboset/ directory
15 | cd ../datasets/roboset/
16 | for file in *.tar.gz; do
17 |     tar -xzvf "$file"
18 | done
19 | 
20 | ## Convert the dataset to tfrecords
21 | python hdf5totfrecords.py


--------------------------------------------------------------------------------
/docs/test_6drot.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.spatial.transform import Rotation as R
  3 | 
  4 | 
  5 | def convert_quaternion_to_euler(quat):
  6 |     """
  7 |     Convert Quarternion (xyzw) to Euler angles (rpy) 
  8 |     """
  9 |     # Normalize
 10 |     quat = quat / np.linalg.norm(quat)
 11 |     euler = R.from_quat(quat).as_euler('xyz')
 12 | 
 13 |     return euler
 14 | 
 15 | 
 16 | def convert_euler_to_quaternion(euler):
 17 |     """
 18 |     Convert Euler angles (rpy) to Quarternion (xyzw)
 19 |     """
 20 |     quat = R.from_euler('xyz', euler).as_quat()
 21 |     
 22 |     return quat
 23 | 
 24 | 
 25 | def convert_euler_to_rotation_matrix(euler):
 26 |     """
 27 |     Convert Euler angles (rpy) to rotation matrix (3x3).
 28 |     """
 29 |     quat = R.from_euler('xyz', euler).as_matrix()
 30 |     
 31 |     return quat
 32 | 
 33 | 
 34 | def convert_rotation_matrix_to_euler(rotmat):
 35 |     """
 36 |     Convert rotation matrix (3x3) to Euler angles (rpy).
 37 |     """
 38 |     r = R.from_matrix(rotmat)
 39 |     euler = r.as_euler('xyz', degrees=False)
 40 |     
 41 |     return euler
 42 | 
 43 | 
 44 | def normalize_vector(v):
 45 |     v_mag = np.linalg.norm(v, axis=-1, keepdims=True)
 46 |     v_mag = np.maximum(v_mag, 1e-8)
 47 |     return v / v_mag
 48 | 
 49 | 
 50 | def cross_product(u, v):
 51 |     i = u[:,1]*v[:,2] - u[:,2]*v[:,1]
 52 |     j = u[:,2]*v[:,0] - u[:,0]*v[:,2]
 53 |     k = u[:,0]*v[:,1] - u[:,1]*v[:,0]
 54 |         
 55 |     out = np.stack((i, j, k), axis=1)
 56 |     return out
 57 |         
 58 | 
 59 | def compute_rotation_matrix_from_ortho6d(ortho6d):
 60 |     x_raw = ortho6d[:, 0:3]
 61 |     y_raw = ortho6d[:, 3:6]
 62 |         
 63 |     x = normalize_vector(x_raw)
 64 |     z = cross_product(x, y_raw)
 65 |     z = normalize_vector(z)
 66 |     y = cross_product(z, x)
 67 |     
 68 |     x = x.reshape(-1, 3, 1)
 69 |     y = y.reshape(-1, 3, 1)
 70 |     z = z.reshape(-1, 3, 1)
 71 |     matrix = np.concatenate((x, y, z), axis=2)
 72 |     return matrix
 73 | 
 74 | 
 75 | def compute_ortho6d_from_rotation_matrix(matrix):
 76 |     # The ortho6d represents the first two column vectors a1 and a2 of the
 77 |     # rotation matrix: [ | , |,  | ]
 78 |     #                  [ a1, a2, a3]
 79 |     #                  [ | , |,  | ]
 80 |     ortho6d = matrix[:, :, :2].transpose(0, 2, 1).reshape(matrix.shape[0], -1)
 81 |     return ortho6d
 82 | 
 83 | 
 84 | # Test
 85 | if __name__ == "__main__":
 86 |     # Randomly generate a euler ange
 87 |     euler = np.random.rand(3) * 2 * np.pi - np.pi
 88 |     euler = euler[None, :]    # Add batch dimension
 89 |     print(f"Input Euler angles: {euler}")
 90 |     
 91 |     # Convert to 6D Rotation
 92 |     rotmat = convert_euler_to_rotation_matrix(euler)
 93 |     ortho6d = compute_ortho6d_from_rotation_matrix(rotmat)
 94 |     print(f"6D Rotation: {ortho6d}")
 95 |     
 96 |     # Convert back to Euler angles
 97 |     rotmat_recovered = compute_rotation_matrix_from_ortho6d(ortho6d)
 98 |     euler_recovered = convert_rotation_matrix_to_euler(rotmat_recovered)
 99 |     print(f"Recovered Euler angles: {euler_recovered}")
100 | 


--------------------------------------------------------------------------------
/finetune.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_IB_HCA=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
 2 | export NCCL_IB_DISABLE=0
 3 | export NCCL_SOCKET_IFNAME=bond0
 4 | export NCCL_DEBUG=INFO
 5 | export NCCL_NVLS_ENABLE=0
 6 | 
 7 | export TEXT_ENCODER_NAME="google/t5-v1_1-xxl"
 8 | export VISION_ENCODER_NAME="google/siglip-so400m-patch14-384"
 9 | export OUTPUT_DIR="./checkpoints/rdt-finetune-1b"
10 | export CFLAGS="-I/usr/include"
11 | export LDFLAGS="-L/usr/lib/x86_64-linux-gnu"
12 | export CUTLASS_PATH="/path/to/cutlass"
13 | 
14 | export WANDB_PROJECT="robotics_diffusion_transformer"
15 | 
16 | if [ ! -d "$OUTPUT_DIR" ]; then
17 |     mkdir "$OUTPUT_DIR"
18 |     echo "Folder '$OUTPUT_DIR' created"
19 | else
20 |     echo "Folder '$OUTPUT_DIR' already exists"
21 | fi
22 | 
23 | # For run in a single node/machine
24 | # accelerate launch main.py \
25 | #     --deepspeed="./configs/zero2.json" \
26 | #     ...
27 | 
28 | deepspeed --hostfile=hostfile.txt main.py \
29 |     --deepspeed="./configs/zero2.json" \
30 |     --pretrained_model_name_or_path="robotics-diffusion-transformer/rdt-1b" \
31 |     --pretrained_text_encoder_name_or_path=$TEXT_ENCODER_NAME \
32 |     --pretrained_vision_encoder_name_or_path=$VISION_ENCODER_NAME \
33 |     --output_dir=$OUTPUT_DIR \
34 |     --train_batch_size=32 \
35 |     --sample_batch_size=64 \
36 |     --max_train_steps=200000 \
37 |     --checkpointing_period=1000 \
38 |     --sample_period=500 \
39 |     --checkpoints_total_limit=40 \
40 |     --lr_scheduler="constant" \
41 |     --learning_rate=1e-4 \
42 |     --mixed_precision="bf16" \
43 |     --dataloader_num_workers=8 \
44 |     --image_aug \
45 |     --dataset_type="finetune" \
46 |     --state_noise_snr=40 \
47 |     --load_from_hdf5 \
48 |     --report_to=wandb
49 | 
50 |     # Use this to resume training from some previous checkpoint
51 |     # --resume_from_checkpoint="checkpoint-36000" \
52 |     # Use this to load from saved lanuage instruction embeddings,
53 |     # instead of calculating it during training
54 |     # --precomp_lang_embed \
55 | 


--------------------------------------------------------------------------------
/finetune_maniskill.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_IB_HCA=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
 2 | export NCCL_IB_DISABLE=0
 3 | export NCCL_SOCKET_IFNAME=bond0
 4 | export NCCL_DEBUG=INFO
 5 | export NCCL_NVLS_ENABLE=0
 6 | 
 7 | export TEXT_ENCODER_NAME="google/t5-v1_1-xxl"
 8 | export VISION_ENCODER_NAME="google/siglip-so400m-patch14-384"
 9 | export OUTPUT_DIR="./checkpoints/rdt-finetune-1b-sim"
10 | export CFLAGS="-I/usr/include"
11 | export LDFLAGS="-L/usr/lib/x86_64-linux-gnu"
12 | export CUTLASS_PATH="/data/lingxuan/cutlass"
13 | 
14 | export WANDB_PROJECT="robotic_diffusion_transformer"
15 | 
16 | if [ ! -d "$OUTPUT_DIR" ]; then
17 |     mkdir "$OUTPUT_DIR"
18 |     echo "Folder '$OUTPUT_DIR' created"
19 | else
20 |     echo "Folder '$OUTPUT_DIR' already exists"
21 | fi
22 | # For run in a single node/machine
23 | # accelerate launch main.py \
24 | #     --deepspeed="./configs/zero2.json" \
25 | #     ...
26 | 
27 | accelerate launch main.py \
28 |     --deepspeed="./configs/zero2.json" \
29 |     --pretrained_model_name_or_path="robotics-diffusion-transformer/rdt-1b" \
30 |     --pretrained_text_encoder_name_or_path=$TEXT_ENCODER_NAME \
31 |     --pretrained_vision_encoder_name_or_path=$VISION_ENCODER_NAME \
32 |     --output_dir=$OUTPUT_DIR \
33 |     --train_batch_size=24 \
34 |     --sample_batch_size=32 \
35 |     --max_train_steps=400000 \
36 |     --checkpointing_period=10000 \
37 |     --sample_period=500 \
38 |     --checkpoints_total_limit=40 \
39 |     --lr_scheduler="constant" \
40 |     --learning_rate=1e-4 \
41 |     --mixed_precision="bf16" \
42 |     --dataloader_num_workers=8 \
43 |     --image_aug \
44 |     --dataset_type="finetune" \
45 |     --state_noise_snr=40 \
46 |     --load_from_hdf5 \
47 |     --report_to=wandb
48 | 
49 | 


--------------------------------------------------------------------------------
/inference.sh:
--------------------------------------------------------------------------------
1 | python -m scripts.agilex_inference \
2 |     --use_actions_interpolation \
3 |     --pretrained_model_name_or_path="checkpoints/your_finetuned_ckpt.pt" \  # your finetuned checkpoint: e.g., checkpoints/rdt-finetune-1b/checkpoint-<STEP NUMBER>, checkpoints/rdt-finetune-1b/checkpoint-<STEP NUMBER>/pytorch_model/mp_rank_00_model_states.pt,
4 |     --lang_embeddings_path="outs/lang_embeddings/your_instr.pt" \
5 |     --ctrl_freq=25    # your control frequency
6 | 


--------------------------------------------------------------------------------
/models/ema_model.py:
--------------------------------------------------------------------------------
 1 | # Reference: DiffusionPolicy [https://github.com/real-stanford/diffusion_policy]
 2 | 
 3 | import torch
 4 | from torch.nn.modules.batchnorm import _BatchNorm
 5 | 
 6 | 
 7 | class EMAModel:
 8 |     """
 9 |     Exponential Moving Average of models weights
10 |     """
11 |     def __init__(
12 |         self,
13 |         model,
14 |         update_after_step=0,
15 |         inv_gamma=1.0,
16 |         power=2 / 3,
17 |         min_value=0.0,
18 |         max_value=0.9999
19 |     ):
20 |         """
21 |         @crowsonkb's notes on EMA Warmup:
22 |             If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan
23 |             to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps),
24 |             gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999
25 |             at 215.4k steps).
26 |         Args:
27 |             inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
28 |             power (float): Exponential factor of EMA warmup. Default: 2/3.
29 |             min_value (float): The minimum EMA decay rate. Default: 0.
30 |         """
31 | 
32 |         self.averaged_model = model
33 |         self.averaged_model.eval()
34 |         self.averaged_model.requires_grad_(False)
35 | 
36 |         self.update_after_step = update_after_step
37 |         self.inv_gamma = inv_gamma
38 |         self.power = power
39 |         self.min_value = min_value
40 |         self.max_value = max_value
41 | 
42 |         self.decay = 0.0
43 |         self.optimization_step = 0
44 | 
45 |     def get_decay(self, optimization_step):
46 |         """
47 |         Compute the decay factor for the exponential moving average.
48 |         """
49 |         step = max(0, optimization_step - self.update_after_step - 1)
50 |         value = 1 - (1 + step / self.inv_gamma) ** -self.power
51 | 
52 |         if step <= 0:
53 |             return 0.0
54 | 
55 |         return max(self.min_value, min(value, self.max_value))
56 | 
57 |     @torch.no_grad()
58 |     def step(self, new_model):
59 |         self.decay = self.get_decay(self.optimization_step)
60 | 
61 |         # old_all_dataptrs = set()
62 |         # for param in new_model.parameters():
63 |         #     data_ptr = param.data_ptr()
64 |         #     if data_ptr != 0:
65 |         #         old_all_dataptrs.add(data_ptr)
66 | 
67 |         all_dataptrs = set()
68 |         for module, ema_module in zip(new_model.modules(), self.averaged_model.modules()):            
69 |             for param, ema_param in zip(module.parameters(recurse=False), ema_module.parameters(recurse=False)):
70 |                 # iterative over immediate parameters only.
71 |                 if isinstance(param, dict):
72 |                     raise RuntimeError('Dict parameter not supported')
73 |                 
74 |                 # data_ptr = param.data_ptr()
75 |                 # if data_ptr != 0:
76 |                 #     all_dataptrs.add(data_ptr)
77 | 
78 |                 if isinstance(module, _BatchNorm):
79 |                     # skip batchnorms
80 |                     ema_param.copy_(param.to(dtype=ema_param.dtype).data)
81 |                 elif not param.requires_grad:
82 |                     ema_param.copy_(param.to(dtype=ema_param.dtype).data)
83 |                 else:
84 |                     ema_param.mul_(self.decay)
85 |                     ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay)
86 | 
87 |         # verify that iterating over module and then parameters is identical to parameters recursively.
88 |         # assert old_all_dataptrs == all_dataptrs
89 |         self.optimization_step += 1
90 | 


--------------------------------------------------------------------------------
/models/hub_mixin.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from typing import Dict, Optional, Union
 4 | 
 5 | from huggingface_hub import PyTorchModelHubMixin
 6 | from huggingface_hub.constants import (PYTORCH_WEIGHTS_NAME,
 7 |                                        SAFETENSORS_SINGLE_FILE)
 8 | from huggingface_hub.file_download import hf_hub_download
 9 | from huggingface_hub.utils import EntryNotFoundError, is_torch_available
10 | 
11 | 
12 | if is_torch_available():
13 |     import torch  # type: ignore
14 | 
15 | 
16 | class CompatiblePyTorchModelHubMixin(PyTorchModelHubMixin):
17 |     """Mixin class to load Pytorch models from the Hub."""
18 |     def _save_pretrained(self, save_directory: Path) -> None:
19 |         """Save weights from a Pytorch model to a local directory."""
20 |         # To bypass saving into safetensor by default
21 |         model_to_save = self.module if hasattr(self, "module") else self  # type: ignore
22 |         torch.save(model_to_save.state_dict(), save_directory / PYTORCH_WEIGHTS_NAME)
23 | 
24 |     @classmethod
25 |     def _from_pretrained(
26 |         cls,
27 |         *,
28 |         model_id: str,
29 |         revision: Optional[str],
30 |         cache_dir: Optional[Union[str, Path]],
31 |         force_download: bool,
32 |         proxies: Optional[Dict],
33 |         resume_download: Optional[bool],
34 |         local_files_only: bool,
35 |         token: Union[str, bool, None],
36 |         map_location: str = "cpu",
37 |         strict: bool = False,
38 |         **model_kwargs,
39 |     ):
40 |         """Load Pytorch pretrained weights and return the loaded model."""
41 |         model = cls(**model_kwargs)
42 |         if os.path.isdir(model_id):
43 |             print("Loading weights from local directory")
44 |             try:
45 |                 model_file = os.path.join(model_id, SAFETENSORS_SINGLE_FILE)
46 |                 return cls._load_as_safetensor(model, model_file, map_location, strict)
47 |             except FileNotFoundError:
48 |                 model_file = os.path.join(model_id, PYTORCH_WEIGHTS_NAME)
49 |                 return cls._load_as_pickle(model, model_file, map_location, strict)
50 |         else:
51 |             try:
52 |                 model_file = hf_hub_download(
53 |                     repo_id=model_id,
54 |                     filename=SAFETENSORS_SINGLE_FILE,
55 |                     revision=revision,
56 |                     cache_dir=cache_dir,
57 |                     force_download=force_download,
58 |                     proxies=proxies,
59 |                     resume_download=resume_download,
60 |                     token=token,
61 |                     local_files_only=local_files_only,
62 |                 )
63 |                 return cls._load_as_safetensor(model, model_file, map_location, strict)
64 |             except EntryNotFoundError:
65 |                 model_file = hf_hub_download(
66 |                     repo_id=model_id,
67 |                     filename=PYTORCH_WEIGHTS_NAME,
68 |                     revision=revision,
69 |                     cache_dir=cache_dir,
70 |                     force_download=force_download,
71 |                     proxies=proxies,
72 |                     resume_download=resume_download,
73 |                     token=token,
74 |                     local_files_only=local_files_only,
75 |                 )
76 |                 return cls._load_as_pickle(model, model_file, map_location, strict)


--------------------------------------------------------------------------------
/models/multimodal_encoder/dinov2_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from transformers import AutoConfig, AutoImageProcessor, AutoModel, Dinov2Model
 4 | 
 5 | 
 6 | class DinoV2VisionTower(nn.Module):
 7 |     def __init__(self, vision_tower, args, delay_load=False):
 8 |         super().__init__()
 9 | 
10 |         self.is_loaded = False
11 | 
12 |         self.vision_tower_name = vision_tower
13 |         self.select_feature = getattr(args, 'mm_vision_select_feature', 'patch')
14 | 
15 |         if not delay_load:
16 |             self.load_model()
17 |         elif getattr(args, 'unfreeze_mm_vision_tower', False):
18 |             self.load_model()
19 |         else:
20 |             self.cfg_only = AutoConfig.from_pretrained(self.vision_tower_name)
21 | 
22 |     def load_model(self, device_map=None):
23 |         if self.is_loaded:
24 |             print('{} is already loaded, `load_model` called again, skipping.'.format(self.vision_tower_name))
25 |             return
26 | 
27 |         self.image_processor = AutoImageProcessor.from_pretrained(self.vision_tower_name)
28 |         self.vision_tower = AutoModel.from_pretrained(self.vision_tower_name, device_map=device_map)
29 |         self.vision_tower.requires_grad_(False) # FIXME:
30 | 
31 |         self.is_loaded = True
32 | 
33 |     def feature_select(self, image_forward_outs):
34 |         image_features = image_forward_outs.last_hidden_state
35 |         if self.select_feature == 'patch':
36 |             image_features = image_features[:, 1:]  # (B, 1369, 1536)
37 |         elif self.select_feature == 'cls_patch':
38 |             image_features = image_features # (B, 1, 1536)
39 |         else:
40 |             raise ValueError(f'Unexpected select feature: {self.select_feature}')
41 |         return image_features
42 | 
43 |     @torch.no_grad()
44 |     def forward(self, images):
45 |         if type(images) is list:
46 |             image_features = []
47 |             for image in images:
48 |                 image_forward_out = self.vision_tower(image.to(device=self.device, dtype=self.dtype).unsqueeze(0))
49 |                 image_feature = self.feature_select(image_forward_out).to(image.dtype)
50 |                 image_features.append(image_feature)
51 |         else:
52 |             image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype))
53 |             image_features = self.feature_select(image_forward_outs).to(images.dtype)
54 | 
55 |         return image_features
56 | 
57 |     @property
58 |     def dummy_feature(self):
59 |         return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype)
60 | 
61 |     @property
62 |     def dtype(self):
63 |         return self.vision_tower.dtype
64 | 
65 |     @property
66 |     def device(self):
67 |         return self.vision_tower.device
68 | 
69 |     @property
70 |     def config(self):
71 |         if self.is_loaded:
72 |             return self.vision_tower.config
73 |         else:
74 |             return self.cfg_only
75 | 
76 |     @property
77 |     def hidden_size(self):
78 |         return self.config.hidden_size
79 | 
80 |     @property
81 |     def num_patches_per_side(self):
82 |         return self.config.image_size // self.config.patch_size
83 | 
84 |     @property
85 |     def num_patches(self):
86 |         return (self.config.image_size // self.config.patch_size) ** 2
87 | 
88 | 


--------------------------------------------------------------------------------
/models/multimodal_encoder/siglip_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from transformers import AutoConfig, SiglipImageProcessor, SiglipVisionModel
 4 | 
 5 | 
 6 | class SiglipVisionTower(nn.Module):
 7 |     def __init__(self, vision_tower, args, delay_load=False):
 8 |         super().__init__()
 9 | 
10 |         self.is_loaded = False
11 | 
12 |         self.vision_tower_name = vision_tower
13 |         self.select_feature = getattr(args, 'mm_vision_select_feature', 'patch')
14 | 
15 |         if not delay_load:
16 |             self.load_model()
17 |         elif getattr(args, 'unfreeze_mm_vision_tower', False):
18 |             self.load_model()
19 |         else:
20 |             self.cfg_only = AutoConfig.from_pretrained(self.vision_tower_name)
21 | 
22 |     def load_model(self, device_map=None):
23 |         if self.is_loaded:
24 |             print('{} is already loaded, `load_model` called again, skipping.'.format(self.vision_tower_name))
25 |             return
26 | 
27 |         self.image_processor = SiglipImageProcessor.from_pretrained(self.vision_tower_name)
28 |         self.vision_tower = SiglipVisionModel.from_pretrained(self.vision_tower_name, device_map=device_map)
29 |         self.vision_tower.eval()
30 | 
31 |         self.is_loaded = True
32 | 
33 |     def feature_select(self, image_forward_outs):
34 |         if self.select_feature == 'patch':
35 |             image_features = image_forward_outs.last_hidden_state  # (B, 729, 1536)
36 |         elif self.select_feature == 'cls_patch':
37 |             image_features = image_forward_outs.pooler_output # (B, 1, 1536)
38 |         else:
39 |             raise ValueError(f'Unexpected select feature: {self.select_feature}')
40 |         return image_features
41 | 
42 |     @torch.no_grad()
43 |     def forward(self, images):
44 |         if type(images) is list:
45 |             image_features = []
46 |             for image in images:
47 |                 image_forward_out = self.vision_tower(image.to(device=self.device, dtype=self.dtype).unsqueeze(0))
48 |                 image_feature = self.feature_select(image_forward_out).to(image.dtype)
49 |                 image_features.append(image_feature)
50 |         else:
51 |             image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype))
52 |             image_features = self.feature_select(image_forward_outs).to(images.dtype)
53 | 
54 |         return image_features
55 | 
56 |     @property
57 |     def dummy_feature(self):
58 |         return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype)
59 | 
60 |     @property
61 |     def dtype(self):
62 |         return self.vision_tower.dtype
63 | 
64 |     @property
65 |     def device(self):
66 |         return self.vision_tower.device
67 | 
68 |     @property
69 |     def config(self):
70 |         if self.is_loaded:
71 |             return self.vision_tower.config
72 |         else:
73 |             return self.cfg_only
74 | 
75 |     @property
76 |     def hidden_size(self):
77 |         return self.config.hidden_size
78 | 
79 |     @property
80 |     def num_patches_per_side(self):
81 |         return self.config.image_size // self.config.patch_size
82 | 
83 |     @property
84 |     def num_patches(self):
85 |         return (self.config.image_size // self.config.patch_size) ** 2
86 | 
87 | 


--------------------------------------------------------------------------------
/pretrain.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_IB_HCA=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
 2 | export NCCL_IB_DISABLE=0
 3 | export NCCL_SOCKET_IFNAME=bond0
 4 | export NCCL_DEBUG=INFO
 5 | export NCCL_NVLS_ENABLE=0
 6 | 
 7 | export TEXT_ENCODER_NAME="google/t5-v1_1-xxl"
 8 | export VISION_ENCODER_NAME="google/siglip-so400m-patch14-384"
 9 | export OUTPUT_DIR="./checkpoints/rdt-pretrain-1b"
10 | export CFLAGS="-I/usr/include"
11 | export LDFLAGS="-L/usr/lib/x86_64-linux-gnu"
12 | export CUTLASS_PATH="/path/to/cutlass"
13 | 
14 | export WANDB_PROJECT="robotics_diffusion_transformer"
15 | 
16 | if [ ! -d "$OUTPUT_DIR" ]; then
17 |     mkdir "$OUTPUT_DIR"
18 |     echo "Folder '$OUTPUT_DIR' created"
19 | else
20 |     echo "Folder '$OUTPUT_DIR' already exists"
21 | fi
22 | 
23 | # For run in a single node/machine
24 | # accelerate launch main.py \
25 | #     --deepspeed="./configs/zero2.json" \
26 | #     ...
27 | 
28 | deepspeed --hostfile=hostfile.txt main.py \
29 |     --deepspeed="./configs/zero2.json" \
30 |     --pretrained_text_encoder_name_or_path=$TEXT_ENCODER_NAME \
31 |     --pretrained_vision_encoder_name_or_path=$VISION_ENCODER_NAME \
32 |     --output_dir=$OUTPUT_DIR \
33 |     --train_batch_size=32 \
34 |     --sample_batch_size=64 \
35 |     --max_train_steps=1000000 \
36 |     --checkpointing_period=1000 \
37 |     --sample_period=500 \
38 |     --checkpoints_total_limit=40 \
39 |     --lr_scheduler="constant" \
40 |     --learning_rate=1e-4 \
41 |     --mixed_precision="bf16" \
42 |     --dataloader_num_workers=8 \
43 |     --dataset_type="pretrain" \
44 |     --report_to=wandb
45 | 
46 |     # Use this to resume training from some previous checkpoint
47 |     # --resume_from_checkpoint="checkpoint-1000" \
48 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | packaging==24.0
 2 | wandb==0.17.0
 3 | deepspeed==0.14.2   
 4 | accelerate==0.30.1
 5 | diffusers==0.27.2
 6 | timm==1.0.3
 7 | transformers==4.41.0
 8 | sentencepiece==0.2.0
 9 | h5py==3.11.0
10 | opencv-python==4.9.0.80
11 | imgaug==0.4.0


--------------------------------------------------------------------------------
/requirements_data.txt:
--------------------------------------------------------------------------------
1 | tfds-nightly==4.9.4.dev202402070044
2 | gsutil==5.27
3 | tensorflow==2.15.0.post1
4 | pillow==10.2.0
5 | pyyaml==6.0.1
6 | opencv-python==4.9.0.80
7 | tensorflow-graphics==2021.12.3
8 | imageio==2.34.0
9 | imageio-ffmpeg==0.4.9


--------------------------------------------------------------------------------
/scripts/encode_lang.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import yaml
 5 | 
 6 | from models.multimodal_encoder.t5_encoder import T5Embedder
 7 | 
 8 | 
 9 | GPU = 0
10 | MODEL_PATH = "google/t5-v1_1-xxl"
11 | CONFIG_PATH = "configs/base.yaml"
12 | SAVE_DIR = "outs/"
13 | 
14 | # Modify this to your task name and instruction
15 | TASK_NAME = "handover_pan"
16 | INSTRUCTION = "Pick up the black marker on the right and put it into the packaging box on the left."
17 | 
18 | # Note: if your GPU VRAM is less than 24GB, 
19 | # it is recommended to enable offloading by specifying an offload directory.
20 | OFFLOAD_DIR = None  # Specify your offload directory here, ensuring the directory exists.
21 | 
22 | def main():
23 |     with open(CONFIG_PATH, "r") as fp:
24 |         config = yaml.safe_load(fp)
25 |     
26 |     device = torch.device(f"cuda:{GPU}")
27 |     text_embedder = T5Embedder(
28 |         from_pretrained=MODEL_PATH, 
29 |         model_max_length=config["dataset"]["tokenizer_max_length"], 
30 |         device=device,
31 |         use_offload_folder=OFFLOAD_DIR
32 |     )
33 |     tokenizer, text_encoder = text_embedder.tokenizer, text_embedder.model
34 | 
35 |     tokens = tokenizer(
36 |         INSTRUCTION, return_tensors="pt",
37 |         padding="longest",
38 |         truncation=True
39 |     )["input_ids"].to(device)
40 | 
41 |     tokens = tokens.view(1, -1)
42 |     with torch.no_grad():
43 |         pred = text_encoder(tokens).last_hidden_state.detach().cpu()
44 |     
45 |     save_path = os.path.join(SAVE_DIR, f"{TASK_NAME}.pt")
46 |     # We save the embeddings in a dictionary format
47 |     torch.save({
48 |             "name": TASK_NAME,
49 |             "instruction": INSTRUCTION,
50 |             "embeddings": pred
51 |         }, save_path
52 |     )
53 |     
54 |     print(f'\"{INSTRUCTION}\" from \"{TASK_NAME}\" is encoded by \"{MODEL_PATH}\" into shape {pred.shape} and saved to \"{save_path}\"')
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     main()
59 | 


--------------------------------------------------------------------------------
/scripts/encode_lang_batch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import torch
 5 | import yaml
 6 | from tqdm import tqdm
 7 | 
 8 | from models.multimodal_encoder.t5_encoder import T5Embedder
 9 | 
10 | 
11 | GPU = 0
12 | MODEL_PATH = "google/t5-v1_1-xxl"
13 | CONFIG_PATH = "configs/base.yaml"
14 | # Modify the TARGET_DIR to your dataset path
15 | TARGET_DIR = "data/datasets/agilex/tfrecords/"
16 | 
17 | # Note: if your GPU VRAM is less than 24GB, 
18 | # it is recommended to enable offloading by specifying an offload directory.
19 | OFFLOAD_DIR = None  # Specify your offload directory here, ensuring the directory exists.
20 | 
21 | def main():
22 |     with open(CONFIG_PATH, "r") as fp:
23 |         config = yaml.safe_load(fp)
24 |     
25 |     device = torch.device(f"cuda:{GPU}")
26 |     text_embedder = T5Embedder(
27 |         from_pretrained=MODEL_PATH, 
28 |         model_max_length=config["dataset"]["tokenizer_max_length"], 
29 |         device=device,
30 |         use_offload_folder=OFFLOAD_DIR
31 |     )
32 |     tokenizer, text_encoder = text_embedder.tokenizer, text_embedder.model
33 |     
34 |     # Get all the task paths
35 |     task_paths = []
36 |     for sub_dir in os.listdir(TARGET_DIR):
37 |         middle_dir = os.path.join(TARGET_DIR, sub_dir)
38 |         if os.path.isdir(middle_dir):
39 |             for task_dir in os.listdir(middle_dir):
40 |                 task_path = os.path.join(middle_dir, task_dir)
41 |                 if os.path.isdir(task_path):
42 |                     task_paths.append(task_path)
43 | 
44 |     # For each task, encode the instructions
45 |     for task_path in tqdm(task_paths):
46 |         # Load the instructions corresponding to the task from the directory
47 |         with open(os.path.join(task_path, 'expanded_instruction_gpt-4-turbo.json'), 'r') as f_instr:
48 |             instruction_dict = json.load(f_instr)
49 |         instructions = [instruction_dict['instruction']] + instruction_dict['simplified_instruction'] + \
50 |             instruction_dict['expanded_instruction']
51 |     
52 |         # Encode the instructions
53 |         tokenized_res = tokenizer(
54 |             instructions, return_tensors="pt",
55 |             padding="longest",
56 |             truncation=True
57 |         )
58 |         tokens = tokenized_res["input_ids"].to(device)
59 |         attn_mask = tokenized_res["attention_mask"].to(device)
60 |         
61 |         with torch.no_grad():
62 |             text_embeds = text_encoder(
63 |                 input_ids=tokens,
64 |                 attention_mask=attn_mask
65 |             )["last_hidden_state"].detach().cpu()
66 |         
67 |         attn_mask = attn_mask.cpu().bool()
68 | 
69 |         # Save the embeddings for training use
70 |         for i in range(len(instructions)):
71 |             text_embed = text_embeds[i][attn_mask[i]]
72 |             save_path = os.path.join(task_path, f"lang_embed_{i}.pt")
73 |             torch.save(text_embed, save_path)
74 | 
75 | if __name__ == "__main__":
76 |     main()
77 | 


--------------------------------------------------------------------------------
/train/image_corrupt.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | warnings.simplefilter(action='ignore', category=FutureWarning)
 3 | 
 4 | import numpy as np
 5 | np.bool = np.bool_
 6 | import imgaug.augmenters as iaa
 7 | from PIL import Image
 8 | 
 9 | 
10 | # Define our sequence of augmentation steps that will be applied to every image.
11 | seq = iaa.Sequential(
12 |     [
13 |         # Execute one of the following noise augmentations
14 |         iaa.OneOf([
15 |             iaa.AdditiveGaussianNoise(
16 |                 loc=0, scale=(0.0, 0.05*255), per_channel=0.5
17 |             ),
18 |             iaa.AdditiveLaplaceNoise(scale=(0.0, 0.05*255), per_channel=0.5),
19 |             iaa.AdditivePoissonNoise(lam=(0.0, 0.05*255), per_channel=0.5)
20 |         ]),
21 |         
22 |         # Execute one or none of the following blur augmentations
23 |         iaa.SomeOf((0, 1), [
24 |             iaa.OneOf([
25 |                 iaa.GaussianBlur((0, 3.0)),
26 |                 iaa.AverageBlur(k=(2, 7)),
27 |                 iaa.MedianBlur(k=(3, 11)),
28 |             ]),
29 |             iaa.MotionBlur(k=(3, 36)),
30 |         ]),
31 |     ],
32 |     # do all of the above augmentations in random order
33 |     random_order=True
34 | )
35 | 
36 | 
37 | def image_corrupt(image: Image):
38 |     image_arr = np.array(image)
39 |     image_arr = image_arr[None, ...]
40 |     
41 |     image_arr = seq(images=image_arr)
42 |     
43 |     image = Image.fromarray(image_arr[0])
44 |     return image
45 | 


--------------------------------------------------------------------------------