├── .gitignore ├── LICENSE ├── README.md ├── main ├── cfg │ ├── config.yaml │ ├── distillation_config.yaml │ ├── distillation_student_arch │ │ ├── pointnet.yaml │ │ └── rnn_pointnet.yaml │ ├── residual_config.yaml │ ├── residual_policy_arch │ │ ├── perceiver.yaml │ │ └── pointnet.yaml │ ├── residual_policy_task │ │ ├── default.yaml │ │ └── insert.yaml │ ├── rl_train │ │ ├── InsertFullPPO.yaml │ │ ├── InsertSinglePPO.yaml │ │ ├── LiftLeanedLegPPO.yaml │ │ ├── ReachAndGraspFullPPO.yaml │ │ ├── ReachAndGraspSinglePPO.yaml │ │ ├── ScrewFullPPO.yaml │ │ ├── ScrewSinglePPO.yaml │ │ └── StabilizePPO.yaml │ └── task │ │ ├── InsertFull.yaml │ │ ├── InsertFullPCD.yaml │ │ ├── InsertSingle.yaml │ │ ├── InsertSinglePCD.yaml │ │ ├── LiftLeanedLeg.yaml │ │ ├── LiftLeanedLegPCD.yaml │ │ ├── ReachAndGraspFull.yaml │ │ ├── ReachAndGraspFullPCD.yaml │ │ ├── ReachAndGraspSingle.yaml │ │ ├── ReachAndGraspSinglePCD.yaml │ │ ├── ScrewFull.yaml │ │ ├── ScrewFullPCD.yaml │ │ ├── ScrewSingle.yaml │ │ ├── ScrewSinglePCD.yaml │ │ ├── Stabilize.yaml │ │ └── StabilizePCD.yaml ├── correction_data_collection.py ├── distillation │ ├── test.py │ └── train.py ├── integrated_deployment.py ├── residual │ └── train.py └── rl │ └── train.py ├── media ├── SUSig-red.png └── method_overview.gif ├── requirements.txt ├── setup.py └── transic ├── __init__.py ├── distillation ├── __init__.py ├── data │ ├── __init__.py │ ├── collate.py │ ├── data_module.py │ ├── dataset.py │ └── dummy.py ├── module.py └── policy │ ├── __init__.py │ ├── pointnet_policy.py │ └── rnn_pointnet_policy.py ├── learn ├── __init__.py ├── lightning.py ├── lr_schedule.py ├── optimizer_group.py └── policy │ ├── __init__.py │ ├── base.py │ └── distributions.py ├── nn ├── __init__.py ├── features │ ├── __init__.py │ ├── embedding.py │ ├── fusion.py │ ├── identity.py │ └── pointcloud │ │ ├── __init__.py │ │ ├── pointnet.py │ │ └── set_transformer │ │ ├── __init__.py │ │ ├── set_transformer.py │ │ └── set_xf_pcd_encoder.py └── mlp.py ├── real_world ├── __init__.py └── obs.py ├── residual ├── __init__.py ├── data │ ├── __init__.py │ ├── collate.py │ ├── data_module.py │ └── dataset.py ├── module.py └── policy │ ├── __init__.py │ ├── perceiver_residual_policy.py │ └── pointnet_residual_policy.py ├── rl ├── __init__.py ├── agent.py ├── base.py ├── models.py ├── moving_avg.py ├── network_builder.py ├── player.py └── runner.py └── utils ├── __init__.py ├── array.py ├── config_utils.py ├── datadict.py ├── misc_utils.py ├── reformat.py ├── rlgames_utils.py ├── torch_utils.py ├── tree_utils.py ├── utils.py └── wandb_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.ipynb 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | .static_storage/ 57 | .media/ 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | # env/ 89 | venv/ 90 | # ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # Mac 108 | .DS_Store 109 | 110 | # MuJoCo License key 111 | mjkey.txt 112 | 113 | .mujocomanip_temp_model.xml 114 | 115 | # Python IDE 116 | .idea 117 | 118 | # Locally generated files 119 | dump.rdb 120 | *.local.ipynb 121 | runs/ 122 | temp* 123 | debug_* 124 | *.swp 125 | 126 | .tabnine_root 127 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yunfan Jiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /main/cfg/config.yaml: -------------------------------------------------------------------------------- 1 | # set default task and default training config based on task 2 | defaults: 3 | - task: ??? 4 | - rl_train: ${find_rl_train_config:${task}} 5 | - override hydra/job_logging: disabled 6 | - _self_ 7 | 8 | # Task name - used to pick the class to load 9 | task_name: ${task.name} 10 | # experiment name. defaults to name of training config 11 | experiment: '' 12 | 13 | # if set to positive integer, overrides the default number of environments 14 | num_envs: '' 15 | 16 | # seed - set to -1 to choose random seed 17 | seed: -1 18 | # set to True for deterministic performance 19 | torch_deterministic: False 20 | 21 | # set the maximum number of learning iterations to train for. overrides default per-environment setting 22 | max_iterations: 9999999999999 # train forever 23 | 24 | ## Device config 25 | # 'physx' or 'flex' 26 | physics_engine: 'physx' 27 | # whether to use cpu or gpu pipeline 28 | pipeline: 'gpu' 29 | # device for running physics simulation 30 | sim_device: 'cuda:0' 31 | # device to run RL 32 | rl_device: 'cuda:0' 33 | graphics_device_id: 0 34 | 35 | ## PhysX arguments 36 | num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only. 37 | solver_type: 1 # 0: pgs, 1: tgs 38 | num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread 39 | 40 | # RLGames Arguments 41 | # test - if set, run policy in inference mode (requires setting checkpoint to load) 42 | test: False 43 | # save rollouts config, used for distillation 44 | save_rollouts: False 45 | save_successful_rollouts_only: True 46 | num_rollouts_to_save: 10000 47 | min_episode_length: 20 48 | # used to set checkpoint path 49 | checkpoint: '' 50 | from_ckpt_epoch: false 51 | # set sigma when restoring network 52 | sigma: '' 53 | # set to True to use multi-gpu training 54 | multi_gpu: False 55 | 56 | wandb_activate: False 57 | wandb_group: '' 58 | wandb_name: ${rl_train.params.config.name} 59 | wandb_entity: null # set to your wandb entity if using wandb 60 | wandb_project: null # set to your wandb project if using wandb 61 | wandb_tags: [] 62 | wandb_logcode_dir: '' 63 | 64 | capture_video: False 65 | n_parallel_recorders: 8 66 | n_successful_videos_to_record: 50 67 | display: False 68 | headless: True 69 | 70 | # set the directory where the output files get saved 71 | hydra: 72 | output_subdir: null 73 | run: 74 | dir: . 75 | -------------------------------------------------------------------------------- /main/cfg/distillation_config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - config 3 | - _self_ # all below configs will override this conf.yaml 4 | - distillation_student_arch: ??? 5 | 6 | run_name: "${arch_name}_lr${lr}_wd${wd}_b${bs}" 7 | exp_root_dir: ??? 8 | 9 | arch_name: ??? # filled by distillation_student_arch 10 | 11 | # ====== main cfg ====== 12 | seed: -1 13 | gpus: 1 14 | lr: 1e-4 15 | wd: 0.0 16 | bs: 32 17 | sim_device: 0 18 | rl_device: 0 19 | graphics_device_id: 0 20 | eval_interval: 5 # every N epochs 21 | data_path: ??? 22 | matched_scene_data_path: ??? 23 | 24 | # ------ logging ------ 25 | use_wandb: true 26 | wandb_project: ??? 27 | wandb_run_name: ${run_name} 28 | 29 | # ------ module ------ 30 | module: 31 | _target_: transic.distillation.module.DistillationModule 32 | # ====== policies ====== 33 | prop_obs_keys: 34 | - q 35 | - cos_q 36 | - sin_q 37 | - eef_pos 38 | - eef_quat 39 | - gripper_width 40 | pcd_sample_points: ${task.env.pcdN} 41 | # ====== learning ====== 42 | lr: ${lr} 43 | optimizer: "adam" 44 | weight_decay: ${wd} 45 | # ====== env creation ====== 46 | rlg_task_cfg: ${task} 47 | num_envs: ${num_envs} 48 | display: ${display} 49 | # ====== training data augmentation ====== 50 | enable_pcd_augmentation: true 51 | pcd_aug_apply_prob: 0.4 52 | pcd_aug_random_trans_high: [0.04, 0.04, 0.04] 53 | pcd_aug_random_trans_low: [-0.04, -0.04, -0.04] 54 | pcd_aug_jitter_ratio: 0.1 55 | pcd_aug_jitter_sigma: 0.01 56 | pcd_aug_jitter_low: -0.015 57 | pcd_aug_jitter_high: 0.015 58 | enable_prop_augmentation: true 59 | prop_aug_scale_sigma: 0.1 60 | prop_aug_scale_low: -0.3 61 | prop_aug_scale_high: 0.3 62 | # ====== eval ====== 63 | n_eval_episodes: 1000 64 | # ====== pcd regularization ====== 65 | enable_pcd_matched_scenes_regularization: true 66 | pcd_matched_scenes_reg_weight: 1e-3 67 | # ====== device ====== 68 | sim_device: ${sim_device} 69 | rl_device: ${rl_device} 70 | graphics_device_id: ${graphics_device_id} 71 | 72 | data_module: 73 | _target_: transic.distillation.data.DistillationDataModule 74 | data_path: ${data_path} 75 | matched_scene_data_path: ${matched_scene_data_path} 76 | ctx_len: -1 # -1 means not using sequence policy at all 77 | skip_first_n_steps: 0 78 | sampled_pcd_points: ${task.env.pcdN} 79 | refresh_pcd_sampling_idxs_interval: 0.1 80 | real_pcd_x_limits: [0.2, 0.7] 81 | real_pcd_y_limits: [-0.3, 0.3] 82 | real_pcd_z_min: 0.01 83 | batch_size: ${bs} 84 | dataloader_num_workers: 64 85 | seed: ${seed} 86 | 87 | trainer: 88 | _target_: pytorch_lightning.Trainer 89 | accelerator: "gpu" 90 | devices: ${gpus} 91 | benchmark: true # enables cudnn.benchmark 92 | accumulate_grad_batches: 1 93 | num_sanity_val_steps: 0 94 | max_epochs: 999999999 95 | val_check_interval: null 96 | check_val_every_n_epoch: ${eval_interval} 97 | gradient_clip_val: 1.0 98 | checkpoint: # this sub-dict will be popped to send to ModelCheckpoint as args 99 | - filename: "s{step}-val_sr{val/success_rate:.5f}" 100 | save_top_k: 5 101 | save_last: true 102 | monitor: "val/success_rate" 103 | mode: max 104 | auto_insert_metric_name: false # prevent creating subfolder caused by the slash 105 | 106 | # ------------- Testing --------------- 107 | test: 108 | ckpt_path: null 109 | 110 | hydra: 111 | job: 112 | chdir: true 113 | run: 114 | dir: "." 115 | output_subdir: null -------------------------------------------------------------------------------- /main/cfg/distillation_student_arch/pointnet.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | arch_name: pointnet 4 | module: 5 | student_policy: 6 | _target_: transic.distillation.policy.PointNetPolicy 7 | point_channels: 3 8 | subtract_point_mean: false 9 | add_ee_embd: true 10 | ee_embd_dim: 128 11 | pointnet_output_dim: 256 12 | pointnet_hidden_dim: 256 13 | pointnet_hidden_depth: 2 14 | pointnet_activation: "gelu" 15 | prop_input_dim: 29 16 | feature_fusion_hidden_depth: 1 17 | feature_fusion_hidden_dim: 512 18 | feature_fusion_output_dim: 512 19 | feature_fusion_activation: "relu" 20 | feature_fusion_add_input_activation: false 21 | feature_fusion_add_output_activation: false 22 | action_dim: 8 23 | action_net_gmm_n_modes: 5 24 | action_net_hidden_dim: 128 25 | action_net_hidden_depth: 3 26 | action_net_activation: "relu" 27 | deterministic_inference: true 28 | gmm_low_noise_eval: true 29 | -------------------------------------------------------------------------------- /main/cfg/distillation_student_arch/rnn_pointnet.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | arch_name: rnn_pointnet 4 | rnn_horizon: 5 5 | module: 6 | student_policy: 7 | _target_: transic.distillation.policy.RNNPointNetPolicy 8 | point_channels: 3 9 | subtract_point_mean: false 10 | add_ee_embd: true 11 | ee_embd_dim: 128 12 | pointnet_output_dim: 256 13 | pointnet_hidden_dim: 256 14 | pointnet_hidden_depth: 2 15 | pointnet_activation: "gelu" 16 | prop_input_dim: 29 17 | feature_fusion_hidden_depth: 1 18 | feature_fusion_hidden_dim: 512 19 | feature_fusion_output_dim: 512 20 | feature_fusion_activation: "relu" 21 | feature_fusion_add_input_activation: false 22 | feature_fusion_add_output_activation: false 23 | rnn_type: "lstm" 24 | rnn_n_layers: 2 25 | rnn_hidden_dim: 512 26 | ctx_len: ${rnn_horizon} 27 | action_dim: 8 28 | action_net_gmm_n_modes: 5 29 | action_net_hidden_dim: 128 30 | action_net_hidden_depth: 3 31 | action_net_activation: "relu" 32 | deterministic_inference: true 33 | gmm_low_noise_eval: true 34 | 35 | data_module: 36 | ctx_len: ${rnn_horizon} -------------------------------------------------------------------------------- /main/cfg/residual_config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ # all below configs will override this conf.yaml 3 | - residual_policy_arch: ??? 4 | - residual_policy_task: ??? 5 | 6 | run_name: "residual_policy_${arch_name}_lr${lr}_wd${wd}_b${bs}" 7 | exp_root_dir: ??? 8 | 9 | arch_name: ??? # filled by residual_policy_arch 10 | 11 | # ====== main cfg ====== 12 | seed: -1 13 | gpus: 1 14 | lr: 1e-4 15 | wd: 0.0 16 | bs: 32 17 | vbs: 32 18 | data_dir: ??? 19 | eval_interval: 1 20 | 21 | # ------ logging ------ 22 | use_wandb: true 23 | wandb_project: ??? 24 | wandb_run_name: ${run_name} 25 | 26 | # ------ module ------ 27 | module: 28 | _target_: transic.residual.module.ResidualPolicyModule 29 | include_robot_gripper_action_input: true 30 | learn_gripper_action: true 31 | # ====== learning ====== 32 | lr: ${lr} 33 | use_cosine_lr: true 34 | lr_warmup_steps: 1000 35 | lr_cosine_steps: 100000 36 | lr_cosine_min: 1e-6 37 | optimizer: "adam" 38 | weight_decay: ${wd} 39 | intervention_pred_loss_weight: 1.0 40 | # ====== pcd sampling ====== 41 | pcd_downsample_N: null 42 | 43 | data_module: 44 | _target_: transic.residual.data.ResidualDataModule 45 | data_dir: ${data_dir} 46 | include_grasp_action: true 47 | gripper_close_width: 0.025 48 | gripper_open_width: 0.08 49 | variable_len_pcd_handle_strategy: "truncate" 50 | seed: ${seed} 51 | batch_size: ${bs} 52 | val_batch_size: ${vbs} 53 | train_portion: 0.9 54 | dataloader_num_workers: 8 55 | 56 | trainer: 57 | _target_: pytorch_lightning.Trainer 58 | accelerator: "gpu" 59 | devices: ${gpus} 60 | benchmark: true # enables cudnn.benchmark 61 | accumulate_grad_batches: 1 62 | num_sanity_val_steps: 0 63 | max_epochs: 999999999 64 | val_check_interval: null 65 | check_val_every_n_epoch: ${eval_interval} 66 | gradient_clip_val: 1.0 67 | checkpoint: # this sub-dict will be popped to send to ModelCheckpoint as args 68 | - filename: "epoch{epoch}-val_loss{val/loss:.5f}" 69 | save_top_k: 5 70 | save_last: true 71 | monitor: "val/loss" 72 | mode: min 73 | auto_insert_metric_name: false # prevent creating subfolder caused by the slash 74 | - filename: "epoch{epoch}-val_intervention_acc{val/intervention_acc:.5f}" 75 | save_top_k: 3 76 | save_last: false 77 | monitor: "val/intervention_acc" 78 | mode: max 79 | auto_insert_metric_name: false # prevent creating subfolder caused by the slash 80 | 81 | # ---------------------------- 82 | 83 | hydra: 84 | job: 85 | chdir: true 86 | run: 87 | dir: "." 88 | output_subdir: null -------------------------------------------------------------------------------- /main/cfg/residual_policy_arch/perceiver.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | bs: 8 4 | 5 | arch_name: perceiver_mlp 6 | module: 7 | residual_policy: 8 | _target_: transic.residual.policy.PerceiverResidualPolicy 9 | point_channels: 3 10 | subtract_point_mean: false 11 | add_ee_embd: true 12 | ee_embd_dim: 128 13 | set_xf_hidden_dim: 256 14 | set_xf_num_heads: 8 15 | set_xf_num_queries: 8 16 | set_xf_pool_type: concat 17 | set_xf_layer_norm: false 18 | prop_input_dim: 29 19 | robot_policy_output_dim: 7 20 | include_robot_policy_gripper_action_input: true 21 | robot_policy_gripper_action_embd_dim: 64 22 | feature_fusion_hidden_depth: 1 23 | feature_fusion_hidden_dim: 512 24 | feature_fusion_output_dim: 512 25 | feature_fusion_activation: "relu" 26 | feature_fusion_add_input_activation: false 27 | feature_fusion_add_output_activation: false 28 | action_dim: 8 29 | action_net_gmm_n_modes: 5 30 | action_net_hidden_dim: 128 31 | action_net_hidden_depth: 3 32 | action_net_activation: "relu" 33 | intervention_head_hidden_dim: 128 34 | intervention_head_hidden_depth: 3 35 | intervention_head_activation: "relu" 36 | deterministic_inference: true 37 | gmm_low_noise_eval: true 38 | update_intervention_head_only: false 39 | ckpt_path_if_update_intervention_head_only: null 40 | 41 | data_module: 42 | ctx_len: 20 # this is not required by the mlp policy, but just for consistency 43 | variable_len_pcd_handle_strategy: "pad" # for set transformer, we can use full PCD -------------------------------------------------------------------------------- /main/cfg/residual_policy_arch/pointnet.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | bs: 8 4 | 5 | arch_name: pointnet_mlp 6 | module: 7 | residual_policy: 8 | _target_: transic.residual.policy.PointNetResidualPolicy 9 | point_channels: 3 10 | subtract_point_mean: false 11 | add_ee_embd: true 12 | ee_embd_dim: 128 13 | pointnet_output_dim: 256 14 | pointnet_hidden_dim: 256 15 | pointnet_hidden_depth: 2 16 | pointnet_activation: "gelu" 17 | prop_input_dim: 29 18 | robot_policy_output_dim: 7 19 | include_robot_policy_gripper_action_input: true 20 | robot_policy_gripper_action_embd_dim: 64 21 | feature_fusion_hidden_depth: 1 22 | feature_fusion_hidden_dim: 512 23 | feature_fusion_output_dim: 512 24 | feature_fusion_activation: "relu" 25 | feature_fusion_add_input_activation: false 26 | feature_fusion_add_output_activation: false 27 | action_dim: 8 28 | action_net_gmm_n_modes: 5 29 | action_net_hidden_dim: 128 30 | action_net_hidden_depth: 3 31 | action_net_activation: "relu" 32 | intervention_head_hidden_dim: 128 33 | intervention_head_hidden_depth: 3 34 | intervention_head_activation: "relu" 35 | deterministic_inference: true 36 | gmm_low_noise_eval: true 37 | update_intervention_head_only: false 38 | ckpt_path_if_update_intervention_head_only: null 39 | 40 | data_module: 41 | ctx_len: 20 # this is not required by the mlp policy, but just for consistency -------------------------------------------------------------------------------- /main/cfg/residual_policy_task/default.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | -------------------------------------------------------------------------------- /main/cfg/residual_policy_task/insert.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | module: 4 | # ====== pcd sampling ====== 5 | pcd_downsample_N: 1000 6 | include_robot_gripper_action_input: false 7 | learn_gripper_action: false 8 | residual_policy: 9 | include_robot_policy_gripper_action_input: false 10 | action_dim: 7 11 | 12 | data_module: 13 | include_grasp_action: false 14 | -------------------------------------------------------------------------------- /main/cfg/rl_train/InsertFullPPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | algo: 4 | name: ppo 5 | 6 | model: 7 | name: my_continuous_a2c_logstd 8 | 9 | network: 10 | name: dict_obs_actor_critic 11 | separate: False 12 | 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | mu_init: 18 | name: default 19 | sigma_init: 20 | name: const_initializer 21 | val: 0 22 | fixed_sigma: True 23 | 24 | dict_feature_encoder: 25 | _target_: transic.nn.features.SimpleFeatureFusion 26 | extractors: 27 | privileged: 28 | _target_: transic.nn.features.Identity 29 | input_dim: 130 30 | proprioception: 31 | _target_: transic.nn.features.Identity 32 | input_dim: 29 33 | hidden_depth: 1 34 | hidden_dim: 256 35 | output_dim: 256 36 | activation: "relu" 37 | add_input_activation: false 38 | add_output_activation: false 39 | 40 | mlp: 41 | units: [256, 128, 64] 42 | activation: elu 43 | d2rl: False 44 | 45 | initializer: 46 | name: default 47 | regularizer: 48 | name: None 49 | 50 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 51 | load_path: ${...checkpoint} # path to the checkpoint to load 52 | 53 | config: 54 | name: ${resolve_default:InsertFull,${....experiment}} 55 | full_experiment_name: ${.name} 56 | env_name: rlgpu 57 | multi_gpu: ${....multi_gpu} 58 | ppo: True 59 | mixed_precision: False 60 | normalize_input: True 61 | normalize_input_excluded_keys: [] 62 | normalize_value: True 63 | value_bootstrap: True 64 | num_actors: ${....task.env.numEnvs} 65 | reward_shaper: 66 | scale_value: 1.0 67 | normalize_advantage: True 68 | gamma: 0.99 69 | tau: 0.95 70 | learning_rate: 5e-4 71 | lr_schedule: adaptive 72 | schedule_type: standard 73 | kl_threshold: 0.008 74 | score_to_win: 10000 75 | max_epochs: ${resolve_default:10000,${....max_iterations}} 76 | save_best_after: 200 77 | save_frequency: 100 78 | print_stats: True 79 | grad_norm: 1.0 80 | entropy_coef: 0.0 81 | truncate_grads: True 82 | e_clip: 0.2 83 | horizon_length: 32 84 | minibatch_size: 4096 85 | mini_epochs: 5 86 | critic_coef: 4 87 | clip_value: True 88 | seq_len: 4 89 | bounds_loss_coef: 0.0001 -------------------------------------------------------------------------------- /main/cfg/rl_train/InsertSinglePPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | algo: 4 | name: ppo 5 | 6 | model: 7 | name: my_continuous_a2c_logstd 8 | 9 | network: 10 | name: dict_obs_actor_critic 11 | separate: False 12 | 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | mu_init: 18 | name: default 19 | sigma_init: 20 | name: const_initializer 21 | val: 0 22 | fixed_sigma: True 23 | 24 | dict_feature_encoder: 25 | _target_: transic.nn.features.SimpleFeatureFusion 26 | extractors: 27 | privileged: 28 | _target_: transic.nn.features.Identity 29 | input_dim: 76 30 | proprioception: 31 | _target_: transic.nn.features.Identity 32 | input_dim: 29 33 | hidden_depth: 1 34 | hidden_dim: 256 35 | output_dim: 256 36 | activation: "relu" 37 | add_input_activation: false 38 | add_output_activation: false 39 | 40 | mlp: 41 | units: [256, 128, 64] 42 | activation: elu 43 | d2rl: False 44 | 45 | initializer: 46 | name: default 47 | regularizer: 48 | name: None 49 | 50 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 51 | load_path: ${...checkpoint} # path to the checkpoint to load 52 | 53 | config: 54 | name: ${resolve_default:InsertSingle,${....experiment}} 55 | full_experiment_name: ${.name} 56 | env_name: rlgpu 57 | multi_gpu: ${....multi_gpu} 58 | ppo: True 59 | mixed_precision: False 60 | normalize_input: True 61 | normalize_input_excluded_keys: [] 62 | normalize_value: True 63 | value_bootstrap: True 64 | num_actors: ${....task.env.numEnvs} 65 | reward_shaper: 66 | scale_value: 1.0 67 | normalize_advantage: True 68 | gamma: 0.99 69 | tau: 0.95 70 | learning_rate: 5e-4 71 | lr_schedule: adaptive 72 | schedule_type: standard 73 | kl_threshold: 0.008 74 | score_to_win: 10000 75 | max_epochs: ${resolve_default:10000,${....max_iterations}} 76 | save_best_after: 200 77 | save_frequency: 100 78 | print_stats: True 79 | grad_norm: 1.0 80 | entropy_coef: 0.0 81 | truncate_grads: True 82 | e_clip: 0.2 83 | horizon_length: 32 84 | minibatch_size: 16384 85 | mini_epochs: 5 86 | critic_coef: 4 87 | clip_value: True 88 | seq_len: 4 89 | bounds_loss_coef: 0.0001 -------------------------------------------------------------------------------- /main/cfg/rl_train/LiftLeanedLegPPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | algo: 4 | name: ppo 5 | 6 | model: 7 | name: my_continuous_a2c_logstd 8 | 9 | network: 10 | name: dict_obs_actor_critic 11 | separate: False 12 | 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | mu_init: 18 | name: default 19 | sigma_init: 20 | name: const_initializer 21 | val: 0 22 | fixed_sigma: True 23 | 24 | dict_feature_encoder: 25 | _target_: transic.nn.features.SimpleFeatureFusion 26 | extractors: 27 | privileged: 28 | _target_: transic.nn.features.Identity 29 | input_dim: 136 30 | proprioception: 31 | _target_: transic.nn.features.Identity 32 | input_dim: 29 33 | hidden_depth: 1 34 | hidden_dim: 256 35 | output_dim: 256 36 | activation: "relu" 37 | add_input_activation: false 38 | add_output_activation: false 39 | 40 | mlp: 41 | units: [256, 128, 64] 42 | activation: elu 43 | d2rl: False 44 | 45 | initializer: 46 | name: default 47 | regularizer: 48 | name: None 49 | 50 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 51 | load_path: ${...checkpoint} # path to the checkpoint to load 52 | 53 | config: 54 | name: ${resolve_default:LiftLeanedLeg,${....experiment}} 55 | full_experiment_name: ${.name} 56 | env_name: rlgpu 57 | multi_gpu: ${....multi_gpu} 58 | ppo: True 59 | mixed_precision: False 60 | normalize_input: True 61 | normalize_input_excluded_keys: [] 62 | normalize_value: True 63 | value_bootstrap: True 64 | num_actors: ${....task.env.numEnvs} 65 | reward_shaper: 66 | scale_value: 1.0 67 | normalize_advantage: True 68 | gamma: 0.99 69 | tau: 0.95 70 | learning_rate: 5e-4 71 | lr_schedule: adaptive 72 | schedule_type: standard 73 | kl_threshold: 0.008 74 | score_to_win: 10000 75 | max_epochs: ${resolve_default:10000,${....max_iterations}} 76 | save_best_after: 200 77 | save_frequency: 100 78 | print_stats: True 79 | grad_norm: 1.0 80 | entropy_coef: 0.0 81 | truncate_grads: True 82 | e_clip: 0.2 83 | horizon_length: 32 84 | minibatch_size: 4096 85 | mini_epochs: 5 86 | critic_coef: 4 87 | clip_value: True 88 | seq_len: 4 89 | bounds_loss_coef: 0.0001 -------------------------------------------------------------------------------- /main/cfg/rl_train/ReachAndGraspFullPPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | algo: 4 | name: ppo 5 | 6 | model: 7 | name: my_continuous_a2c_logstd 8 | 9 | network: 10 | name: dict_obs_actor_critic 11 | separate: False 12 | 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | mu_init: 18 | name: default 19 | sigma_init: 20 | name: const_initializer 21 | val: 0 22 | fixed_sigma: True 23 | 24 | dict_feature_encoder: 25 | _target_: transic.nn.features.SimpleFeatureFusion 26 | extractors: 27 | privileged: 28 | _target_: transic.nn.features.Identity 29 | input_dim: 58 30 | proprioception: 31 | _target_: transic.nn.features.Identity 32 | input_dim: 29 33 | hidden_depth: 1 34 | hidden_dim: 256 35 | output_dim: 256 36 | activation: "relu" 37 | add_input_activation: false 38 | add_output_activation: false 39 | 40 | mlp: 41 | units: [256, 128, 64] 42 | activation: elu 43 | d2rl: False 44 | 45 | initializer: 46 | name: default 47 | regularizer: 48 | name: None 49 | 50 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 51 | load_path: ${...checkpoint} # path to the checkpoint to load 52 | 53 | config: 54 | name: ${resolve_default:ReachAndGraspFull,${....experiment}} 55 | full_experiment_name: ${.name} 56 | env_name: rlgpu 57 | multi_gpu: ${....multi_gpu} 58 | ppo: True 59 | mixed_precision: False 60 | normalize_input: True 61 | normalize_input_excluded_keys: [] 62 | normalize_value: True 63 | value_bootstrap: True 64 | num_actors: ${....task.env.numEnvs} 65 | reward_shaper: 66 | scale_value: 1.0 67 | normalize_advantage: True 68 | gamma: 0.99 69 | tau: 0.95 70 | learning_rate: 5e-4 71 | lr_schedule: adaptive 72 | schedule_type: standard 73 | kl_threshold: 0.008 74 | score_to_win: 10000 75 | max_epochs: ${resolve_default:10000,${....max_iterations}} 76 | save_best_after: 200 77 | save_frequency: 100 78 | print_stats: True 79 | grad_norm: 1.0 80 | entropy_coef: 0.0 81 | truncate_grads: True 82 | e_clip: 0.2 83 | horizon_length: 32 84 | minibatch_size: 4096 85 | mini_epochs: 5 86 | critic_coef: 4 87 | clip_value: True 88 | seq_len: 4 89 | bounds_loss_coef: 0.0001 -------------------------------------------------------------------------------- /main/cfg/rl_train/ReachAndGraspSinglePPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | algo: 4 | name: ppo 5 | 6 | model: 7 | name: my_continuous_a2c_logstd 8 | 9 | network: 10 | name: dict_obs_actor_critic 11 | separate: False 12 | 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | mu_init: 18 | name: default 19 | sigma_init: 20 | name: const_initializer 21 | val: 0 22 | fixed_sigma: True 23 | 24 | dict_feature_encoder: 25 | _target_: transic.nn.features.SimpleFeatureFusion 26 | extractors: 27 | privileged: 28 | _target_: transic.nn.features.Identity 29 | input_dim: 58 30 | proprioception: 31 | _target_: transic.nn.features.Identity 32 | input_dim: 29 33 | hidden_depth: 1 34 | hidden_dim: 256 35 | output_dim: 256 36 | activation: "relu" 37 | add_input_activation: false 38 | add_output_activation: false 39 | 40 | mlp: 41 | units: [256, 128, 64] 42 | activation: elu 43 | d2rl: False 44 | 45 | initializer: 46 | name: default 47 | regularizer: 48 | name: None 49 | 50 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 51 | load_path: ${...checkpoint} # path to the checkpoint to load 52 | 53 | config: 54 | name: ${resolve_default:ReachAndGraspSingle,${....experiment}} 55 | full_experiment_name: ${.name} 56 | env_name: rlgpu 57 | multi_gpu: ${....multi_gpu} 58 | ppo: True 59 | mixed_precision: False 60 | normalize_input: True 61 | normalize_input_excluded_keys: [] 62 | normalize_value: True 63 | value_bootstrap: True 64 | num_actors: ${....task.env.numEnvs} 65 | reward_shaper: 66 | scale_value: 1.0 67 | normalize_advantage: True 68 | gamma: 0.99 69 | tau: 0.95 70 | learning_rate: 5e-4 71 | lr_schedule: adaptive 72 | schedule_type: standard 73 | kl_threshold: 0.008 74 | score_to_win: 10000 75 | max_epochs: ${resolve_default:10000,${....max_iterations}} 76 | save_best_after: 100 77 | save_frequency: 50 78 | print_stats: True 79 | grad_norm: 1.0 80 | entropy_coef: 0.0 81 | truncate_grads: True 82 | e_clip: 0.2 83 | horizon_length: 32 84 | minibatch_size: 16384 85 | mini_epochs: 5 86 | critic_coef: 4 87 | clip_value: True 88 | seq_len: 4 89 | bounds_loss_coef: 0.0001 90 | -------------------------------------------------------------------------------- /main/cfg/rl_train/ScrewFullPPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | algo: 4 | name: ppo 5 | 6 | model: 7 | name: my_continuous_a2c_logstd 8 | 9 | network: 10 | name: dict_obs_actor_critic 11 | separate: False 12 | 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | mu_init: 18 | name: default 19 | sigma_init: 20 | name: const_initializer 21 | val: 0 22 | fixed_sigma: True 23 | 24 | dict_feature_encoder: 25 | _target_: transic.nn.features.SimpleFeatureFusion 26 | extractors: 27 | privileged: 28 | _target_: transic.nn.features.Identity 29 | input_dim: 74 30 | proprioception: 31 | _target_: transic.nn.features.Identity 32 | input_dim: 29 33 | hidden_depth: 1 34 | hidden_dim: 256 35 | output_dim: 256 36 | activation: "relu" 37 | add_input_activation: false 38 | add_output_activation: false 39 | 40 | mlp: 41 | units: [256, 128, 64] 42 | activation: elu 43 | d2rl: False 44 | 45 | initializer: 46 | name: default 47 | regularizer: 48 | name: None 49 | 50 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 51 | load_path: ${...checkpoint} # path to the checkpoint to load 52 | 53 | config: 54 | name: ${resolve_default:ScrewFull,${....experiment}} 55 | full_experiment_name: ${.name} 56 | env_name: rlgpu 57 | multi_gpu: ${....multi_gpu} 58 | ppo: True 59 | mixed_precision: False 60 | normalize_input: True 61 | normalize_input_excluded_keys: [] 62 | normalize_value: True 63 | value_bootstrap: True 64 | num_actors: ${....task.env.numEnvs} 65 | reward_shaper: 66 | scale_value: 1.0 67 | normalize_advantage: True 68 | gamma: 0.99 69 | tau: 0.95 70 | learning_rate: 5e-4 71 | lr_schedule: adaptive 72 | schedule_type: standard 73 | kl_threshold: 0.008 74 | score_to_win: 10000 75 | max_epochs: ${resolve_default:10000,${....max_iterations}} 76 | save_best_after: 200 77 | save_frequency: 100 78 | print_stats: True 79 | grad_norm: 1.0 80 | entropy_coef: 0.0 81 | truncate_grads: True 82 | e_clip: 0.2 83 | horizon_length: 32 84 | minibatch_size: 16384 85 | mini_epochs: 5 86 | critic_coef: 4 87 | clip_value: True 88 | seq_len: 4 89 | bounds_loss_coef: 0.0001 90 | -------------------------------------------------------------------------------- /main/cfg/rl_train/ScrewSinglePPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | algo: 4 | name: ppo 5 | 6 | model: 7 | name: my_continuous_a2c_logstd 8 | 9 | network: 10 | name: dict_obs_actor_critic 11 | separate: False 12 | 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | mu_init: 18 | name: default 19 | sigma_init: 20 | name: const_initializer 21 | val: 0 22 | fixed_sigma: True 23 | 24 | dict_feature_encoder: 25 | _target_: transic.nn.features.SimpleFeatureFusion 26 | extractors: 27 | privileged: 28 | _target_: transic.nn.features.Identity 29 | input_dim: 74 30 | proprioception: 31 | _target_: transic.nn.features.Identity 32 | input_dim: 29 33 | hidden_depth: 1 34 | hidden_dim: 256 35 | output_dim: 256 36 | activation: "relu" 37 | add_input_activation: false 38 | add_output_activation: false 39 | 40 | mlp: 41 | units: [256, 128, 64] 42 | activation: elu 43 | d2rl: False 44 | 45 | initializer: 46 | name: default 47 | regularizer: 48 | name: None 49 | 50 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 51 | load_path: ${...checkpoint} # path to the checkpoint to load 52 | 53 | config: 54 | name: ${resolve_default:ScrewSingle,${....experiment}} 55 | full_experiment_name: ${.name} 56 | env_name: rlgpu 57 | multi_gpu: ${....multi_gpu} 58 | ppo: True 59 | mixed_precision: False 60 | normalize_input: True 61 | normalize_input_excluded_keys: [] 62 | normalize_value: True 63 | value_bootstrap: True 64 | num_actors: ${....task.env.numEnvs} 65 | reward_shaper: 66 | scale_value: 1.0 67 | normalize_advantage: True 68 | gamma: 0.99 69 | tau: 0.95 70 | learning_rate: 5e-4 71 | lr_schedule: adaptive 72 | schedule_type: standard 73 | kl_threshold: 0.008 74 | score_to_win: 10000 75 | max_epochs: ${resolve_default:10000,${....max_iterations}} 76 | save_best_after: 200 77 | save_frequency: 100 78 | print_stats: True 79 | grad_norm: 1.0 80 | entropy_coef: 0.0 81 | truncate_grads: True 82 | e_clip: 0.2 83 | horizon_length: 32 84 | minibatch_size: 16384 85 | mini_epochs: 5 86 | critic_coef: 4 87 | clip_value: True 88 | seq_len: 4 89 | bounds_loss_coef: 0.0001 90 | -------------------------------------------------------------------------------- /main/cfg/rl_train/StabilizePPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | algo: 4 | name: ppo 5 | 6 | model: 7 | name: my_continuous_a2c_logstd 8 | 9 | network: 10 | name: dict_obs_actor_critic 11 | separate: False 12 | 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | mu_init: 18 | name: default 19 | sigma_init: 20 | name: const_initializer 21 | val: 0 22 | fixed_sigma: True 23 | 24 | dict_feature_encoder: 25 | _target_: transic.nn.features.SimpleFeatureFusion 26 | extractors: 27 | privileged: 28 | _target_: transic.nn.features.Identity 29 | input_dim: 145 30 | proprioception: 31 | _target_: transic.nn.features.Identity 32 | input_dim: 36 33 | hidden_depth: 1 34 | hidden_dim: 256 35 | output_dim: 256 36 | activation: "relu" 37 | add_input_activation: false 38 | add_output_activation: false 39 | 40 | mlp: 41 | units: [256, 128, 64] 42 | activation: elu 43 | d2rl: False 44 | 45 | initializer: 46 | name: default 47 | regularizer: 48 | name: None 49 | 50 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 51 | load_path: ${...checkpoint} # path to the checkpoint to load 52 | 53 | config: 54 | name: ${resolve_default:Stabilize,${....experiment}} 55 | full_experiment_name: ${.name} 56 | env_name: rlgpu 57 | multi_gpu: ${....multi_gpu} 58 | ppo: True 59 | mixed_precision: False 60 | normalize_input: True 61 | normalize_input_excluded_keys: [] 62 | normalize_value: True 63 | value_bootstrap: True 64 | num_actors: ${....task.env.numEnvs} 65 | reward_shaper: 66 | scale_value: 1.0 67 | normalize_advantage: True 68 | gamma: 0.99 69 | tau: 0.95 70 | learning_rate: 5e-4 71 | lr_schedule: adaptive 72 | schedule_type: standard 73 | kl_threshold: 0.008 74 | score_to_win: 10000 75 | max_epochs: ${resolve_default:10000,${....max_iterations}} 76 | save_best_after: 200 77 | save_frequency: 100 78 | print_stats: True 79 | grad_norm: 1.0 80 | entropy_coef: 0.0 81 | truncate_grads: True 82 | e_clip: 0.2 83 | horizon_length: 32 84 | minibatch_size: 4096 85 | mini_epochs: 5 86 | critic_coef: 4 87 | clip_value: True 88 | seq_len: 4 89 | bounds_loss_coef: 0.0001 90 | -------------------------------------------------------------------------------- /main/cfg/task/InsertFullPCD.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: InsertFullPCD 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:2048,${...num_envs}} 10 | episodeLength: 100 11 | 12 | furniture: "square_table" 13 | 14 | pcdMaskRatio: null 15 | pcdN: 768 16 | 17 | clipObservations: 5.0 18 | clipActions: 1.0 19 | 20 | frankaDofNoise: 0.25 21 | 22 | distanceReward: 0 23 | successReward: 1 24 | 25 | selectedLegIdx: ??? 26 | 27 | aggregateMode: 3 28 | 29 | actionScale: 1.0 30 | useQuatRot: false 31 | 32 | frankaDefaultDofPos: [-0.0186, -0.1682, 0.0344, -2.6468, 0.0269, 2.5371, 0.7211, 0.0145, 0.0145] 33 | 34 | pcAugmentation: 35 | enabled: true 36 | applyP: 0.4 37 | randomTransLow: [-0.04, -0.04, -0.04] 38 | randomTransHigh: [0.04, 0.04, 0.04] 39 | 40 | jitterRatio: 0.1 41 | jitterSigma: 0.01 42 | jitterHigh: 0.015 43 | jitterLow: -0.015 44 | 45 | propObsDim: 29 46 | obsKeys: 47 | - q 48 | - cos_q 49 | - sin_q 50 | - eef_pos 51 | - eef_quat 52 | - gripper_width 53 | 54 | privilegedObsDim: 130 55 | privilegedObsKeys: 56 | - square_table_top_pos 57 | - square_table_top_rot 58 | - square_table_top_vel 59 | - square_table_leg1_pos 60 | - square_table_leg1_rot 61 | - square_table_leg1_vel 62 | - square_table_leg2_pos 63 | - square_table_leg2_rot 64 | - square_table_leg2_vel 65 | - square_table_leg3_pos 66 | - square_table_leg3_rot 67 | - square_table_leg3_vel 68 | - square_table_leg4_pos 69 | - square_table_leg4_rot 70 | - square_table_leg4_vel 71 | - obstacle_front_pos 72 | - obstacle_left_pos 73 | - obstacle_right_pos 74 | - eef_vel 75 | - ftip_center_pos 76 | - eef_lf_pos 77 | - eef_rf_pos 78 | - q 79 | - cos_q 80 | - sin_q 81 | - dq 82 | - q_gripper 83 | - target_xyz 84 | 85 | # set to True if you use camera sensors in the environment 86 | enableCameraSensors: False 87 | 88 | sim: 89 | dt: 0.01667 # 1/60 90 | substeps: 2 91 | up_axis: "z" 92 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 93 | gravity: [0.0, 0.0, -9.81] 94 | physx: 95 | num_threads: ${....num_threads} 96 | solver_type: ${....solver_type} 97 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 98 | num_position_iterations: 8 99 | num_velocity_iterations: 1 100 | contact_offset: 0.005 101 | rest_offset: 0.0 102 | bounce_threshold_velocity: 0.2 103 | max_depenetration_velocity: 1000.0 104 | default_buffer_size_multiplier: 5.0 105 | max_gpu_contact_pairs: 1048576 # 1024*1024 106 | num_subscenes: ${....num_subscenes} 107 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 108 | 109 | task: 110 | randomize: False 111 | -------------------------------------------------------------------------------- /main/cfg/task/InsertSingle.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: InsertSingle 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:8192,${...num_envs}} 10 | episodeLength: 100 11 | 12 | furniture: "table_with_one_leg" 13 | 14 | clipObservations: 5.0 15 | clipActions: 1.0 16 | 17 | frankaDofNoise: 0.25 18 | 19 | distanceReward: 0.1 20 | successReward: 100.0 21 | rotationNoise: 60 22 | 23 | aggregateMode: 3 24 | 25 | actionScale: 1.0 26 | useQuatRot: false 27 | 28 | frankaDefaultDofPos: [-0.0186, -0.1682, 0.0344, -2.6468, 0.0269, 2.5371, 0.7211, 0.0145, 0.0145] 29 | 30 | # for distillation 31 | propDumpInfo: 32 | q: 7 33 | eef_pos: 3 34 | eef_quat: 4 35 | gripper_width: 1 36 | 37 | propObsDim: 29 38 | obsKeys: 39 | - q 40 | - cos_q 41 | - sin_q 42 | - eef_pos 43 | - eef_quat 44 | - gripper_width 45 | 46 | privilegedObsDim: 76 47 | privilegedObsKeys: 48 | - square_table_top_pos 49 | - square_table_top_rot 50 | - square_table_top_vel 51 | - square_table_leg4_pos 52 | - square_table_leg4_rot 53 | - square_table_leg4_vel 54 | - eef_vel 55 | - ftip_center_pos 56 | - obstacle_front_pos 57 | - obstacle_left_pos 58 | - obstacle_right_pos 59 | - front_wall_cf 60 | - left_wall_cf 61 | - right_wall_cf 62 | - square_table_top_cf 63 | - square_table_leg4_cf 64 | - eef_lf_pos 65 | - eef_rf_pos 66 | - dq 67 | - target_xy 68 | 69 | # set to True if you use camera sensors in the environment 70 | enableCameraSensors: False 71 | 72 | sim: 73 | dt: 0.01667 # 1/60 74 | substeps: 2 75 | up_axis: "z" 76 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 77 | gravity: [0.0, 0.0, -9.81] 78 | physx: 79 | num_threads: ${....num_threads} 80 | solver_type: ${....solver_type} 81 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 82 | num_position_iterations: 8 83 | num_velocity_iterations: 1 84 | contact_offset: 0.005 85 | rest_offset: 0.0 86 | bounce_threshold_velocity: 0.2 87 | max_depenetration_velocity: 1000.0 88 | default_buffer_size_multiplier: 5.0 89 | max_gpu_contact_pairs: 1048576 # 1024*1024 90 | num_subscenes: ${....num_subscenes} 91 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 92 | 93 | task: 94 | randomize: True 95 | randomization_params: 96 | frequency: 1 97 | sim_params: 98 | gravity: 99 | range: [ 0, 0.4 ] 100 | operation: "additive" 101 | distribution: "uniform" 102 | schedule: "linear" 103 | schedule_steps: 100000000 104 | actor_params: 105 | franka: 106 | color: True 107 | rigid_body_properties: 108 | mass: 109 | range: [0.5, 1.5] 110 | operation: "scaling" 111 | distribution: "uniform" 112 | setup_only: True 113 | schedule: "linear" 114 | schedule_steps: 100000000 115 | rigid_shape_properties: 116 | friction: 117 | num_buckets: 250 118 | range: [ 0.7, 1.3 ] 119 | operation: "scaling" 120 | distribution: "uniform" 121 | schedule: "linear" 122 | schedule_steps: 100000000 123 | dof_properties: 124 | lower: 125 | range: [ 1.0, 1.010050167084168 ] 126 | operation: "scaling" 127 | distribution: "loguniform" 128 | schedule: "linear" 129 | schedule_steps: 100000000 130 | upper: 131 | range: [ 1.0, 1.010050167084168 ] 132 | operation: "scaling" 133 | distribution: "loguniform" 134 | schedule: "linear" 135 | schedule_steps: 100000000 136 | stiffness: 137 | range: [ 1.0, 1.010050167084168 ] 138 | operation: "scaling" 139 | distribution: "loguniform" 140 | schedule: "linear" 141 | schedule_steps: 100000000 142 | damping: 143 | range: [ 1.0, 1.010050167084168 ] 144 | operation: "scaling" 145 | distribution: "loguniform" 146 | schedule: "linear" 147 | schedule_steps: 100000000 148 | table: 149 | color: True 150 | rigid_shape_properties: 151 | friction: 152 | num_buckets: 250 153 | range: [ 0.5, 1.5 ] 154 | operation: "scaling" 155 | distribution: "uniform" 156 | schedule: "linear" 157 | schedule_steps: 100000000 158 | square_table_leg4: 159 | color: True 160 | rigid_body_properties: 161 | mass: 162 | range: [ 0.5, 1.5 ] 163 | operation: "scaling" 164 | distribution: "uniform" 165 | setup_only: True 166 | schedule: "linear" 167 | schedule_steps: 100000000 168 | rigid_shape_properties: 169 | friction: 170 | num_buckets: 250 171 | range: [ 0.5, 1.5 ] 172 | operation: "scaling" 173 | distribution: "uniform" 174 | schedule: "linear" 175 | schedule_steps: 100000000 176 | rolling_friction: 177 | num_buckets: 250 178 | range: [ 0.5, 1.5 ] 179 | operation: "scaling" 180 | distribution: "uniform" 181 | schedule: "linear" 182 | schedule_steps: 100000000 183 | torsion_friction: 184 | num_buckets: 250 185 | range: [ 0.5, 1.5 ] 186 | operation: "scaling" 187 | distribution: "uniform" 188 | schedule: "linear" 189 | schedule_steps: 100000000 190 | restitution: 191 | range: [0.0, 1.0] 192 | operation: "additive" 193 | distribution: "uniform" 194 | schedule: "linear" 195 | schedule_steps: 100000000 196 | compliance: 197 | range: [0.0, 1.0] 198 | operation: "additive" 199 | distribution: "uniform" 200 | schedule: "linear" 201 | schedule_steps: 100000000 202 | square_table_top: 203 | color: True 204 | rigid_body_properties: 205 | mass: 206 | range: [ 0.5, 1.5 ] 207 | operation: "scaling" 208 | distribution: "uniform" 209 | setup_only: True 210 | schedule: "linear" 211 | schedule_steps: 100000000 212 | rigid_shape_properties: 213 | friction: 214 | num_buckets: 250 215 | range: [ 0.5, 1.5 ] 216 | operation: "scaling" 217 | distribution: "uniform" 218 | schedule: "linear" 219 | schedule_steps: 100000000 220 | rolling_friction: 221 | num_buckets: 250 222 | range: [ 0.5, 1.5 ] 223 | operation: "scaling" 224 | distribution: "uniform" 225 | schedule: "linear" 226 | schedule_steps: 100000000 227 | torsion_friction: 228 | num_buckets: 250 229 | range: [ 0.5, 1.5 ] 230 | operation: "scaling" 231 | distribution: "uniform" 232 | schedule: "linear" 233 | schedule_steps: 100000000 234 | restitution: 235 | range: [0.0, 1.0] 236 | operation: "additive" 237 | distribution: "uniform" 238 | schedule: "linear" 239 | schedule_steps: 100000000 240 | compliance: 241 | range: [0.0, 1.0] 242 | operation: "additive" 243 | distribution: "uniform" 244 | schedule: "linear" 245 | schedule_steps: 100000000 -------------------------------------------------------------------------------- /main/cfg/task/InsertSinglePCD.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: InsertSinglePCD 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:2048,${...num_envs}} 10 | episodeLength: 100 11 | 12 | furniture: "table_with_one_leg" 13 | 14 | pcdMaskRatio: null 15 | pcdN: 768 16 | 17 | clipObservations: 5.0 18 | clipActions: 1.0 19 | 20 | frankaDofNoise: 0.25 21 | 22 | distanceReward: 0.1 23 | successReward: 100.0 24 | 25 | aggregateMode: 3 26 | 27 | actionScale: 1.0 28 | useQuatRot: false 29 | 30 | frankaDefaultDofPos: [-0.0186, -0.1682, 0.0344, -2.6468, 0.0269, 2.5371, 0.7211, 0.0145, 0.0145] 31 | 32 | pcAugmentation: 33 | enabled: true 34 | applyP: 0.4 35 | randomTransLow: [-0.04, -0.04, -0.04] 36 | randomTransHigh: [0.04, 0.04, 0.04] 37 | 38 | jitterRatio: 0.1 39 | jitterSigma: 0.01 40 | jitterHigh: 0.015 41 | jitterLow: -0.015 42 | 43 | propObsDim: 29 44 | obsKeys: 45 | - q 46 | - cos_q 47 | - sin_q 48 | - eef_pos 49 | - eef_quat 50 | - gripper_width 51 | 52 | privilegedObsDim: 76 53 | privilegedObsKeys: 54 | - square_table_top_pos 55 | - square_table_top_rot 56 | - square_table_top_vel 57 | - square_table_leg4_pos 58 | - square_table_leg4_rot 59 | - square_table_leg4_vel 60 | - eef_vel 61 | - ftip_center_pos 62 | - obstacle_front_pos 63 | - obstacle_left_pos 64 | - obstacle_right_pos 65 | - front_wall_cf 66 | - left_wall_cf 67 | - right_wall_cf 68 | - square_table_top_cf 69 | - square_table_leg4_cf 70 | - eef_lf_pos 71 | - eef_rf_pos 72 | - dq 73 | - target_xy 74 | 75 | # set to True if you use camera sensors in the environment 76 | enableCameraSensors: False 77 | 78 | sim: 79 | dt: 0.01667 # 1/60 80 | substeps: 2 81 | up_axis: "z" 82 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 83 | gravity: [0.0, 0.0, -9.81] 84 | physx: 85 | num_threads: ${....num_threads} 86 | solver_type: ${....solver_type} 87 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 88 | num_position_iterations: 8 89 | num_velocity_iterations: 1 90 | contact_offset: 0.005 91 | rest_offset: 0.0 92 | bounce_threshold_velocity: 0.2 93 | max_depenetration_velocity: 1000.0 94 | default_buffer_size_multiplier: 5.0 95 | max_gpu_contact_pairs: 1048576 # 1024*1024 96 | num_subscenes: ${....num_subscenes} 97 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 98 | 99 | task: 100 | randomize: False 101 | -------------------------------------------------------------------------------- /main/cfg/task/LiftLeanedLegPCD.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: LiftLeanedLegPCD 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:768,${...num_envs}} 10 | episodeLength: 100 11 | 12 | furniture: "square_table" 13 | 14 | pcdMaskRatio: null 15 | pcdN: 768 16 | 17 | clipObservations: 5.0 18 | clipActions: 1.0 19 | 20 | frankaDofNoise: 0.15 21 | 22 | successWeight: 1 23 | targetLiftHeight: 0.17 24 | successEEFTiltThreshold: 5 25 | 26 | aggregateMode: 3 27 | 28 | actionScale: 1.0 29 | useQuatRot: false 30 | 31 | frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035] 32 | 33 | pcAugmentation: 34 | enabled: true 35 | applyP: 0.4 36 | randomTransLow: [-0.04, -0.04, -0.04] 37 | randomTransHigh: [0.04, 0.04, 0.04] 38 | 39 | jitterRatio: 0.1 40 | jitterSigma: 0.01 41 | jitterHigh: 0.015 42 | jitterLow: -0.015 43 | 44 | propObsDim: 29 45 | obsKeys: 46 | - q 47 | - cos_q 48 | - sin_q 49 | - eef_pos 50 | - eef_quat 51 | - gripper_width 52 | 53 | privilegedObsDim: 136 54 | privilegedObsKeys: 55 | - square_table_top_pos 56 | - square_table_top_rot 57 | - square_table_top_vel 58 | - square_table_leg1_pos 59 | - square_table_leg1_rot 60 | - square_table_leg1_vel 61 | - square_table_leg2_pos 62 | - square_table_leg2_rot 63 | - square_table_leg2_vel 64 | - square_table_leg3_pos 65 | - square_table_leg3_rot 66 | - square_table_leg3_vel 67 | - square_table_leg4_pos 68 | - square_table_leg4_rot 69 | - square_table_leg4_vel 70 | - obstacle_front_pos 71 | - obstacle_left_pos 72 | - obstacle_right_pos 73 | - eef_vel 74 | - eef_lf_pos 75 | - eef_rf_pos 76 | - q 77 | - cos_q 78 | - sin_q 79 | - dq 80 | - q_gripper 81 | - front_wall_cf 82 | - left_wall_cf 83 | - right_wall_cf 84 | - square_table_leg4_cf 85 | 86 | # set to True if you use camera sensors in the environment 87 | enableCameraSensors: False 88 | 89 | sim: 90 | dt: 0.01667 # 1/60 91 | substeps: 2 92 | up_axis: "z" 93 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 94 | gravity: [0.0, 0.0, -9.81] 95 | physx: 96 | num_threads: ${....num_threads} 97 | solver_type: ${....solver_type} 98 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 99 | num_position_iterations: 8 100 | num_velocity_iterations: 1 101 | contact_offset: 0.005 102 | rest_offset: 0.0 103 | bounce_threshold_velocity: 0.2 104 | max_depenetration_velocity: 1000.0 105 | default_buffer_size_multiplier: 5.0 106 | max_gpu_contact_pairs: 1048576 # 1024*1024 107 | num_subscenes: ${....num_subscenes} 108 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 109 | 110 | task: 111 | randomize: False 112 | -------------------------------------------------------------------------------- /main/cfg/task/ReachAndGraspFullPCD.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: ReachAndGraspFullPCD 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:768,${...num_envs}} 10 | episodeLength: 800 11 | 12 | furniture: "square_table_patch_fix" 13 | 14 | pcdMaskRatio: null 15 | pcdN: 768 16 | 17 | clipObservations: 5.0 18 | clipActions: 1.0 19 | 20 | frankaDofNoise: 0.25 21 | 22 | successWeight: 1 23 | targetLiftHeight: 0.05 24 | 25 | selectedLegIdx: ??? 26 | 27 | aggregateMode: 3 28 | 29 | actionScale: 1.0 30 | useQuatRot: false 31 | 32 | frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035] 33 | 34 | pcAugmentation: 35 | enabled: true 36 | applyP: 0.4 37 | randomTransLow: [-0.04, -0.04, -0.04] 38 | randomTransHigh: [0.04, 0.04, 0.04] 39 | 40 | jitterRatio: 0.1 41 | jitterSigma: 0.01 42 | jitterHigh: 0.015 43 | jitterLow: -0.015 44 | 45 | propObsDim: 29 46 | obsKeys: 47 | - q 48 | - cos_q 49 | - sin_q 50 | - eef_pos 51 | - eef_quat 52 | - gripper_width 53 | 54 | privilegedObsDim: 136 55 | privilegedObsKeys: 56 | - square_table_top_pos 57 | - square_table_top_rot 58 | - square_table_top_vel 59 | - square_table_leg1_pos 60 | - square_table_leg1_rot 61 | - square_table_leg1_vel 62 | - square_table_leg2_pos 63 | - square_table_leg2_rot 64 | - square_table_leg2_vel 65 | - square_table_leg3_pos 66 | - square_table_leg3_rot 67 | - square_table_leg3_vel 68 | - leg_pos 69 | - leg_rot 70 | - leg_vel 71 | - obstacle_front_pos 72 | - obstacle_left_pos 73 | - obstacle_right_pos 74 | - eef_vel 75 | - eef_lf_pos 76 | - eef_rf_pos 77 | - q 78 | - cos_q 79 | - sin_q 80 | - dq 81 | - q_gripper 82 | - front_wall_cf 83 | - left_wall_cf 84 | - right_wall_cf 85 | - leg_cf 86 | 87 | # set to True if you use camera sensors in the environment 88 | enableCameraSensors: False 89 | 90 | sim: 91 | dt: 0.01667 # 1/60 92 | substeps: 2 93 | up_axis: "z" 94 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 95 | gravity: [0.0, 0.0, -9.81] 96 | physx: 97 | num_threads: ${....num_threads} 98 | solver_type: ${....solver_type} 99 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 100 | num_position_iterations: 8 101 | num_velocity_iterations: 1 102 | contact_offset: 0.005 103 | rest_offset: 0.0 104 | bounce_threshold_velocity: 0.2 105 | max_depenetration_velocity: 1000.0 106 | default_buffer_size_multiplier: 5.0 107 | max_gpu_contact_pairs: 1048576 # 1024*1024 108 | num_subscenes: ${....num_subscenes} 109 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 110 | 111 | task: 112 | randomize: False 113 | -------------------------------------------------------------------------------- /main/cfg/task/ReachAndGraspSingle.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: ReachAndGraspSingle 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:8192,${...num_envs}} 10 | episodeLength: 50 11 | 12 | clipObservations: 5.0 13 | clipActions: 1.0 14 | 15 | furniture: just_one_leg 16 | 17 | frankaDofNoise: 0.25 18 | 19 | targetLiftHeight: 0.05 20 | distanceReward: 0.1 21 | liftReward: 1.0 22 | successReward: 200.0 23 | 24 | aggregateMode: 3 25 | 26 | actionScale: 1.0 27 | useQuatRot: false 28 | 29 | frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035] 30 | 31 | # for distillation 32 | propDumpInfo: 33 | q: 7 34 | eef_pos: 3 35 | eef_quat: 4 36 | gripper_width: 1 37 | 38 | propObsDim: 29 39 | obsKeys: 40 | - q 41 | - cos_q 42 | - sin_q 43 | - eef_pos 44 | - eef_quat 45 | - gripper_width 46 | 47 | privilegedObsDim: 58 48 | privilegedObsKeys: 49 | - leg_pos 50 | - leg_rot 51 | - leg_vel 52 | - eef_vel 53 | - ftip_center_pos 54 | - obstacle_front_pos 55 | - obstacle_left_pos 56 | - obstacle_right_pos 57 | - front_wall_cf 58 | - left_wall_cf 59 | - right_wall_cf 60 | - leg_cf 61 | - eef_lf_pos 62 | - eef_rf_pos 63 | - dq 64 | 65 | # set to True if you use camera sensors in the environment 66 | enableCameraSensors: False 67 | 68 | sim: 69 | dt: 0.01667 # 1/60 70 | substeps: 2 71 | up_axis: "z" 72 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 73 | gravity: [0.0, 0.0, -9.81] 74 | physx: 75 | num_threads: ${....num_threads} 76 | solver_type: ${....solver_type} 77 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 78 | num_position_iterations: 8 79 | num_velocity_iterations: 1 80 | contact_offset: 0.005 81 | rest_offset: 0.0 82 | bounce_threshold_velocity: 0.2 83 | max_depenetration_velocity: 1000.0 84 | default_buffer_size_multiplier: 5.0 85 | max_gpu_contact_pairs: 1048576 # 1024*1024 86 | num_subscenes: ${....num_subscenes} 87 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 88 | 89 | task: 90 | randomize: True 91 | randomization_params: 92 | frequency: 1 93 | sim_params: 94 | gravity: 95 | range: [ 0, 0.4 ] 96 | operation: "additive" 97 | distribution: "uniform" 98 | schedule: "linear" 99 | schedule_steps: 100000000 100 | actor_params: 101 | franka: 102 | color: True 103 | rigid_body_properties: 104 | mass: 105 | range: [0.5, 1.5] 106 | operation: "scaling" 107 | distribution: "uniform" 108 | setup_only: True 109 | schedule: "linear" 110 | schedule_steps: 100000000 111 | rigid_shape_properties: 112 | friction: 113 | num_buckets: 250 114 | range: [ 0.7, 1.3 ] 115 | operation: "scaling" 116 | distribution: "uniform" 117 | schedule: "linear" 118 | schedule_steps: 100000000 119 | dof_properties: 120 | lower: 121 | range: [ 1.0, 1.010050167084168 ] 122 | operation: "scaling" 123 | distribution: "loguniform" 124 | schedule: "linear" 125 | schedule_steps: 100000000 126 | upper: 127 | range: [ 1.0, 1.010050167084168 ] 128 | operation: "scaling" 129 | distribution: "loguniform" 130 | schedule: "linear" 131 | schedule_steps: 100000000 132 | stiffness: 133 | range: [ 1.0, 1.010050167084168 ] 134 | operation: "scaling" 135 | distribution: "loguniform" 136 | schedule: "linear" 137 | schedule_steps: 100000000 138 | damping: 139 | range: [ 1.0, 1.010050167084168 ] 140 | operation: "scaling" 141 | distribution: "loguniform" 142 | schedule: "linear" 143 | schedule_steps: 100000000 144 | table: 145 | color: True 146 | rigid_shape_properties: 147 | friction: 148 | num_buckets: 250 149 | range: [ 0.5, 1.5 ] 150 | operation: "scaling" 151 | distribution: "uniform" 152 | schedule: "linear" 153 | schedule_steps: 100000000 154 | leg: 155 | color: True 156 | scale: 157 | range: [0.9, 1.1] 158 | operation: "scaling" 159 | distribution: "uniform" 160 | setup_only: True 161 | schedule: "linear" 162 | schedule_steps: 100000000 163 | rigid_body_properties: 164 | mass: 165 | range: [ 0.5, 1.5 ] 166 | operation: "scaling" 167 | distribution: "uniform" 168 | setup_only: True 169 | schedule: "linear" 170 | schedule_steps: 100000000 171 | rigid_shape_properties: 172 | friction: 173 | num_buckets: 250 174 | range: [ 0.5, 1.5 ] 175 | operation: "scaling" 176 | distribution: "uniform" 177 | schedule: "linear" 178 | schedule_steps: 100000000 179 | rolling_friction: 180 | num_buckets: 250 181 | range: [ 0.5, 1.5 ] 182 | operation: "scaling" 183 | distribution: "uniform" 184 | schedule: "linear" 185 | schedule_steps: 100000000 186 | torsion_friction: 187 | num_buckets: 250 188 | range: [ 0.5, 1.5 ] 189 | operation: "scaling" 190 | distribution: "uniform" 191 | schedule: "linear" 192 | schedule_steps: 100000000 193 | restitution: 194 | range: [0.0, 1.0] 195 | operation: "additive" 196 | distribution: "uniform" 197 | schedule: "linear" 198 | schedule_steps: 100000000 199 | compliance: 200 | range: [0.0, 1.0] 201 | operation: "additive" 202 | distribution: "uniform" 203 | schedule: "linear" 204 | schedule_steps: 100000000 205 | -------------------------------------------------------------------------------- /main/cfg/task/ReachAndGraspSinglePCD.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: ReachAndGraspSinglePCD 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:8192,${...num_envs}} 10 | episodeLength: 100 11 | 12 | furniture: "just_one_leg" 13 | 14 | pcdMaskRatio: null 15 | pcdN: 768 16 | 17 | clipObservations: 5.0 18 | clipActions: 1.0 19 | 20 | frankaDofNoise: 0.25 21 | 22 | targetLiftHeight: 0.05 23 | distanceReward: 0.1 24 | liftReward: 1.0 25 | successReward: 200.0 26 | 27 | aggregateMode: 3 28 | 29 | actionScale: 1.0 30 | useQuatRot: false 31 | 32 | frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035] 33 | 34 | pcAugmentation: 35 | enabled: true 36 | applyP: 0.4 37 | randomTransLow: [-0.04, -0.04, -0.04] 38 | randomTransHigh: [0.04, 0.04, 0.04] 39 | 40 | jitterRatio: 0.1 41 | jitterSigma: 0.01 42 | jitterHigh: 0.015 43 | jitterLow: -0.015 44 | 45 | propObsDim: 29 46 | obsKeys: 47 | - q 48 | - cos_q 49 | - sin_q 50 | - eef_pos 51 | - eef_quat 52 | - gripper_width 53 | 54 | privilegedObsDim: 58 55 | privilegedObsKeys: 56 | - leg_pos 57 | - leg_rot 58 | - leg_vel 59 | - eef_vel 60 | - ftip_center_pos 61 | - obstacle_front_pos 62 | - obstacle_left_pos 63 | - obstacle_right_pos 64 | - front_wall_cf 65 | - left_wall_cf 66 | - right_wall_cf 67 | - leg_cf 68 | - eef_lf_pos 69 | - eef_rf_pos 70 | - dq 71 | 72 | # set to True if you use camera sensors in the environment 73 | enableCameraSensors: False 74 | 75 | sim: 76 | dt: 0.01667 # 1/60 77 | substeps: 2 78 | up_axis: "z" 79 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 80 | gravity: [0.0, 0.0, -9.81] 81 | physx: 82 | num_threads: ${....num_threads} 83 | solver_type: ${....solver_type} 84 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 85 | num_position_iterations: 8 86 | num_velocity_iterations: 1 87 | contact_offset: 0.005 88 | rest_offset: 0.0 89 | bounce_threshold_velocity: 0.2 90 | max_depenetration_velocity: 1000.0 91 | default_buffer_size_multiplier: 5.0 92 | max_gpu_contact_pairs: 1048576 # 1024*1024 93 | num_subscenes: ${....num_subscenes} 94 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 95 | 96 | task: 97 | randomize: False 98 | -------------------------------------------------------------------------------- /main/cfg/task/ScrewFull.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: ScrewFull 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:768,${...num_envs}} 10 | episodeLength: 200 11 | 12 | furniture: "square_table" 13 | 14 | clipObservations: 5.0 15 | clipActions: 1.0 16 | 17 | frankaDofNoise: 0.25 18 | 19 | screwReward: 0.1 20 | eefDeviatePenalty: 1e-2 21 | successReward: 100.0 22 | failurePenalty: 0.0 23 | initialQ7NoiseLevel: 0.5 # +- 50% noise range 24 | initialQ1toQ6NoiseLevel: 0.02 # +- 2% noise range 25 | 26 | aggregateMode: 3 27 | 28 | actionScale: 1.0 29 | useQuatRot: false 30 | 31 | frankaDefaultDofPos: [-0.0186, -0.1682, 0.0344, -2.6468, 0.0269, 2.5371, 0.7211, 0.0145, 0.0145] 32 | 33 | # for distillation 34 | propDumpInfo: 35 | q: 7 36 | eef_pos: 3 37 | eef_quat: 4 38 | gripper_width: 1 39 | 40 | propObsDim: 29 41 | obsKeys: 42 | - q 43 | - cos_q 44 | - sin_q 45 | - eef_pos 46 | - eef_quat 47 | - gripper_width 48 | 49 | privilegedObsDim: 74 50 | privilegedObsKeys: 51 | - square_table_top_pos 52 | - square_table_top_rot 53 | - square_table_top_vel 54 | - square_table_leg4_pos 55 | - square_table_leg4_rot 56 | - square_table_leg4_vel 57 | - eef_vel 58 | - ftip_center_pos 59 | - obstacle_front_pos 60 | - obstacle_left_pos 61 | - obstacle_right_pos 62 | - front_wall_cf 63 | - left_wall_cf 64 | - right_wall_cf 65 | - square_table_top_cf 66 | - square_table_leg4_cf 67 | - eef_lf_pos 68 | - eef_rf_pos 69 | - dq 70 | 71 | # set to True if you use camera sensors in the environment 72 | enableCameraSensors: False 73 | 74 | sim: 75 | dt: 0.01667 # 1/60 76 | substeps: 2 77 | up_axis: "z" 78 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 79 | gravity: [0.0, 0.0, -9.81] 80 | physx: 81 | num_threads: ${....num_threads} 82 | solver_type: ${....solver_type} 83 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 84 | num_position_iterations: 8 85 | num_velocity_iterations: 1 86 | contact_offset: 0.005 87 | rest_offset: 0.0 88 | bounce_threshold_velocity: 0.2 89 | max_depenetration_velocity: 1000.0 90 | default_buffer_size_multiplier: 5.0 91 | max_gpu_contact_pairs: 1048576 # 1024*1024 92 | num_subscenes: ${....num_subscenes} 93 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 94 | 95 | task: 96 | randomize: True 97 | randomization_params: 98 | frequency: 1 99 | sim_params: 100 | gravity: 101 | range: [ 0, 0.4 ] 102 | operation: "additive" 103 | distribution: "uniform" 104 | schedule: "linear" 105 | schedule_steps: 100000000 106 | actor_params: 107 | franka: 108 | color: True 109 | rigid_body_properties: 110 | mass: 111 | range: [0.5, 1.5] 112 | operation: "scaling" 113 | distribution: "uniform" 114 | setup_only: True 115 | schedule: "linear" 116 | schedule_steps: 100000000 117 | rigid_shape_properties: 118 | friction: 119 | num_buckets: 250 120 | range: [ 0.7, 1.3 ] 121 | operation: "scaling" 122 | distribution: "uniform" 123 | schedule: "linear" 124 | schedule_steps: 100000000 125 | dof_properties: 126 | lower: 127 | range: [ 1.0, 1.010050167084168 ] 128 | operation: "scaling" 129 | distribution: "loguniform" 130 | schedule: "linear" 131 | schedule_steps: 100000000 132 | upper: 133 | range: [ 1.0, 1.010050167084168 ] 134 | operation: "scaling" 135 | distribution: "loguniform" 136 | schedule: "linear" 137 | schedule_steps: 100000000 138 | stiffness: 139 | range: [ 1.0, 1.010050167084168 ] 140 | operation: "scaling" 141 | distribution: "loguniform" 142 | schedule: "linear" 143 | schedule_steps: 100000000 144 | damping: 145 | range: [ 1.0, 1.010050167084168 ] 146 | operation: "scaling" 147 | distribution: "loguniform" 148 | schedule: "linear" 149 | schedule_steps: 100000000 150 | table: 151 | color: True 152 | rigid_shape_properties: 153 | friction: 154 | num_buckets: 250 155 | range: [ 0.5, 1.5 ] 156 | operation: "scaling" 157 | distribution: "uniform" 158 | schedule: "linear" 159 | schedule_steps: 100000000 160 | square_table_leg4: 161 | color: True 162 | rigid_body_properties: 163 | mass: 164 | range: [ 0.5, 1.5 ] 165 | operation: "scaling" 166 | distribution: "uniform" 167 | setup_only: True 168 | schedule: "linear" 169 | schedule_steps: 100000000 170 | rigid_shape_properties: 171 | friction: 172 | num_buckets: 250 173 | range: [ 0.5, 1.5 ] 174 | operation: "scaling" 175 | distribution: "uniform" 176 | schedule: "linear" 177 | schedule_steps: 100000000 178 | rolling_friction: 179 | num_buckets: 250 180 | range: [ 0.5, 1.5 ] 181 | operation: "scaling" 182 | distribution: "uniform" 183 | schedule: "linear" 184 | schedule_steps: 100000000 185 | torsion_friction: 186 | num_buckets: 250 187 | range: [ 0.5, 1.5 ] 188 | operation: "scaling" 189 | distribution: "uniform" 190 | schedule: "linear" 191 | schedule_steps: 100000000 192 | restitution: 193 | range: [0.0, 1.0] 194 | operation: "additive" 195 | distribution: "uniform" 196 | schedule: "linear" 197 | schedule_steps: 100000000 198 | compliance: 199 | range: [0.0, 1.0] 200 | operation: "additive" 201 | distribution: "uniform" 202 | schedule: "linear" 203 | schedule_steps: 100000000 204 | square_table_top: 205 | color: True 206 | rigid_body_properties: 207 | mass: 208 | range: [ 0.5, 1.5 ] 209 | operation: "scaling" 210 | distribution: "uniform" 211 | setup_only: True 212 | schedule: "linear" 213 | schedule_steps: 100000000 214 | rigid_shape_properties: 215 | friction: 216 | num_buckets: 250 217 | range: [ 0.5, 1.5 ] 218 | operation: "scaling" 219 | distribution: "uniform" 220 | schedule: "linear" 221 | schedule_steps: 100000000 222 | rolling_friction: 223 | num_buckets: 250 224 | range: [ 0.5, 1.5 ] 225 | operation: "scaling" 226 | distribution: "uniform" 227 | schedule: "linear" 228 | schedule_steps: 100000000 229 | torsion_friction: 230 | num_buckets: 250 231 | range: [ 0.5, 1.5 ] 232 | operation: "scaling" 233 | distribution: "uniform" 234 | schedule: "linear" 235 | schedule_steps: 100000000 236 | restitution: 237 | range: [0.0, 1.0] 238 | operation: "additive" 239 | distribution: "uniform" 240 | schedule: "linear" 241 | schedule_steps: 100000000 242 | compliance: 243 | range: [0.0, 1.0] 244 | operation: "additive" 245 | distribution: "uniform" 246 | schedule: "linear" 247 | schedule_steps: 100000000 -------------------------------------------------------------------------------- /main/cfg/task/ScrewFullPCD.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: ScrewFullPCD 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:2048,${...num_envs}} 10 | episodeLength: 200 11 | 12 | furniture: "square_table" 13 | 14 | pcdMaskRatio: null 15 | pcdN: 768 16 | 17 | clipObservations: 5.0 18 | clipActions: 1.0 19 | 20 | frankaDofNoise: 0.25 21 | 22 | screwReward: 0.1 23 | eefDeviatePenalty: 1e-2 24 | successReward: 100.0 25 | failurePenalty: 0.0 26 | initialQ7NoiseLevel: 0.5 # +- 50% noise range 27 | initialQ1toQ6NoiseLevel: 0.02 # +- 2% noise range 28 | 29 | aggregateMode: 3 30 | 31 | actionScale: 1.0 32 | useQuatRot: false 33 | 34 | frankaDefaultDofPos: [-0.0186, -0.1682, 0.0344, -2.6468, 0.0269, 2.5371, 0.7211, 0.0145, 0.0145] 35 | 36 | pcAugmentation: 37 | enabled: true 38 | applyP: 0.4 39 | randomTransLow: [-0.04, -0.04, -0.04] 40 | randomTransHigh: [0.04, 0.04, 0.04] 41 | 42 | jitterRatio: 0.1 43 | jitterSigma: 0.01 44 | jitterHigh: 0.015 45 | jitterLow: -0.015 46 | 47 | propObsDim: 29 48 | obsKeys: 49 | - q 50 | - cos_q 51 | - sin_q 52 | - eef_pos 53 | - eef_quat 54 | - gripper_width 55 | 56 | privilegedObsDim: 74 57 | privilegedObsKeys: 58 | - square_table_top_pos 59 | - square_table_top_rot 60 | - square_table_top_vel 61 | - square_table_leg4_pos 62 | - square_table_leg4_rot 63 | - square_table_leg4_vel 64 | - eef_vel 65 | - ftip_center_pos 66 | - obstacle_front_pos 67 | - obstacle_left_pos 68 | - obstacle_right_pos 69 | - front_wall_cf 70 | - left_wall_cf 71 | - right_wall_cf 72 | - square_table_top_cf 73 | - square_table_leg4_cf 74 | - eef_lf_pos 75 | - eef_rf_pos 76 | - dq 77 | 78 | # set to True if you use camera sensors in the environment 79 | enableCameraSensors: False 80 | 81 | sim: 82 | dt: 0.01667 # 1/60 83 | substeps: 2 84 | up_axis: "z" 85 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 86 | gravity: [0.0, 0.0, -9.81] 87 | physx: 88 | num_threads: ${....num_threads} 89 | solver_type: ${....solver_type} 90 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 91 | num_position_iterations: 8 92 | num_velocity_iterations: 1 93 | contact_offset: 0.005 94 | rest_offset: 0.0 95 | bounce_threshold_velocity: 0.2 96 | max_depenetration_velocity: 1000.0 97 | default_buffer_size_multiplier: 5.0 98 | max_gpu_contact_pairs: 1048576 # 1024*1024 99 | num_subscenes: ${....num_subscenes} 100 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 101 | 102 | task: 103 | randomize: False 104 | -------------------------------------------------------------------------------- /main/cfg/task/ScrewSingle.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: ScrewSingle 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:8192,${...num_envs}} 10 | episodeLength: 200 11 | 12 | furniture: "table_with_one_leg" 13 | 14 | clipObservations: 5.0 15 | clipActions: 1.0 16 | 17 | frankaDofNoise: 0.25 18 | 19 | screwReward: 0.1 20 | eefDeviatePenalty: 1e-2 21 | successReward: 100.0 22 | failurePenalty: 0.0 23 | 24 | initialQ7NoiseLevel: 0.5 # +- 50% noise range 25 | initialQ1toQ6NoiseLevel: 0.02 # +- 2% noise range 26 | 27 | aggregateMode: 3 28 | 29 | actionScale: 1.0 30 | useQuatRot: false 31 | 32 | frankaDefaultDofPos: [-0.0186, -0.1682, 0.0344, -2.6468, 0.0269, 2.5371, 0.7211, 0.0145, 0.0145] 33 | 34 | # for distillation 35 | propDumpInfo: 36 | q: 7 37 | eef_pos: 3 38 | eef_quat: 4 39 | gripper_width: 1 40 | 41 | propObsDim: 29 42 | obsKeys: 43 | - q 44 | - cos_q 45 | - sin_q 46 | - eef_pos 47 | - eef_quat 48 | - gripper_width 49 | 50 | privilegedObsDim: 74 51 | privilegedObsKeys: 52 | - square_table_top_pos 53 | - square_table_top_rot 54 | - square_table_top_vel 55 | - square_table_leg4_pos 56 | - square_table_leg4_rot 57 | - square_table_leg4_vel 58 | - eef_vel 59 | - ftip_center_pos 60 | - obstacle_front_pos 61 | - obstacle_left_pos 62 | - obstacle_right_pos 63 | - front_wall_cf 64 | - left_wall_cf 65 | - right_wall_cf 66 | - square_table_top_cf 67 | - square_table_leg4_cf 68 | - eef_lf_pos 69 | - eef_rf_pos 70 | - dq 71 | 72 | # set to True if you use camera sensors in the environment 73 | enableCameraSensors: False 74 | 75 | sim: 76 | dt: 0.01667 # 1/60 77 | substeps: 2 78 | up_axis: "z" 79 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 80 | gravity: [0.0, 0.0, -9.81] 81 | physx: 82 | num_threads: ${....num_threads} 83 | solver_type: ${....solver_type} 84 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 85 | num_position_iterations: 8 86 | num_velocity_iterations: 1 87 | contact_offset: 0.005 88 | rest_offset: 0.0 89 | bounce_threshold_velocity: 0.2 90 | max_depenetration_velocity: 1000.0 91 | default_buffer_size_multiplier: 5.0 92 | max_gpu_contact_pairs: 1048576 # 1024*1024 93 | num_subscenes: ${....num_subscenes} 94 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 95 | 96 | task: 97 | randomize: True 98 | randomization_params: 99 | frequency: 1 100 | sim_params: 101 | gravity: 102 | range: [ 0, 0.4 ] 103 | operation: "additive" 104 | distribution: "uniform" 105 | schedule: "linear" 106 | schedule_steps: 100000000 107 | actor_params: 108 | franka: 109 | color: True 110 | rigid_body_properties: 111 | mass: 112 | range: [0.5, 1.5] 113 | operation: "scaling" 114 | distribution: "uniform" 115 | setup_only: True 116 | schedule: "linear" 117 | schedule_steps: 100000000 118 | rigid_shape_properties: 119 | friction: 120 | num_buckets: 250 121 | range: [ 0.7, 1.3 ] 122 | operation: "scaling" 123 | distribution: "uniform" 124 | schedule: "linear" 125 | schedule_steps: 100000000 126 | dof_properties: 127 | lower: 128 | range: [ 1.0, 1.010050167084168 ] 129 | operation: "scaling" 130 | distribution: "loguniform" 131 | schedule: "linear" 132 | schedule_steps: 100000000 133 | upper: 134 | range: [ 1.0, 1.010050167084168 ] 135 | operation: "scaling" 136 | distribution: "loguniform" 137 | schedule: "linear" 138 | schedule_steps: 100000000 139 | stiffness: 140 | range: [ 1.0, 1.010050167084168 ] 141 | operation: "scaling" 142 | distribution: "loguniform" 143 | schedule: "linear" 144 | schedule_steps: 100000000 145 | damping: 146 | range: [ 1.0, 1.010050167084168 ] 147 | operation: "scaling" 148 | distribution: "loguniform" 149 | schedule: "linear" 150 | schedule_steps: 100000000 151 | table: 152 | color: True 153 | rigid_shape_properties: 154 | friction: 155 | num_buckets: 250 156 | range: [ 0.5, 1.5 ] 157 | operation: "scaling" 158 | distribution: "uniform" 159 | schedule: "linear" 160 | schedule_steps: 100000000 161 | square_table_leg4: 162 | color: True 163 | rigid_body_properties: 164 | mass: 165 | range: [ 0.5, 1.5 ] 166 | operation: "scaling" 167 | distribution: "uniform" 168 | setup_only: True 169 | schedule: "linear" 170 | schedule_steps: 100000000 171 | rigid_shape_properties: 172 | friction: 173 | num_buckets: 250 174 | range: [ 0.5, 1.5 ] 175 | operation: "scaling" 176 | distribution: "uniform" 177 | schedule: "linear" 178 | schedule_steps: 100000000 179 | rolling_friction: 180 | num_buckets: 250 181 | range: [ 0.5, 1.5 ] 182 | operation: "scaling" 183 | distribution: "uniform" 184 | schedule: "linear" 185 | schedule_steps: 100000000 186 | torsion_friction: 187 | num_buckets: 250 188 | range: [ 0.5, 1.5 ] 189 | operation: "scaling" 190 | distribution: "uniform" 191 | schedule: "linear" 192 | schedule_steps: 100000000 193 | restitution: 194 | range: [0.0, 1.0] 195 | operation: "additive" 196 | distribution: "uniform" 197 | schedule: "linear" 198 | schedule_steps: 100000000 199 | compliance: 200 | range: [0.0, 1.0] 201 | operation: "additive" 202 | distribution: "uniform" 203 | schedule: "linear" 204 | schedule_steps: 100000000 205 | square_table_top: 206 | color: True 207 | rigid_body_properties: 208 | mass: 209 | range: [ 0.5, 1.5 ] 210 | operation: "scaling" 211 | distribution: "uniform" 212 | setup_only: True 213 | schedule: "linear" 214 | schedule_steps: 100000000 215 | rigid_shape_properties: 216 | friction: 217 | num_buckets: 250 218 | range: [ 0.5, 1.5 ] 219 | operation: "scaling" 220 | distribution: "uniform" 221 | schedule: "linear" 222 | schedule_steps: 100000000 223 | rolling_friction: 224 | num_buckets: 250 225 | range: [ 0.5, 1.5 ] 226 | operation: "scaling" 227 | distribution: "uniform" 228 | schedule: "linear" 229 | schedule_steps: 100000000 230 | torsion_friction: 231 | num_buckets: 250 232 | range: [ 0.5, 1.5 ] 233 | operation: "scaling" 234 | distribution: "uniform" 235 | schedule: "linear" 236 | schedule_steps: 100000000 237 | restitution: 238 | range: [0.0, 1.0] 239 | operation: "additive" 240 | distribution: "uniform" 241 | schedule: "linear" 242 | schedule_steps: 100000000 243 | compliance: 244 | range: [0.0, 1.0] 245 | operation: "additive" 246 | distribution: "uniform" 247 | schedule: "linear" 248 | schedule_steps: 100000000 -------------------------------------------------------------------------------- /main/cfg/task/ScrewSinglePCD.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: ScrewSinglePCD 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:2048,${...num_envs}} 10 | episodeLength: 200 11 | 12 | furniture: "table_with_one_leg" 13 | 14 | pcdMaskRatio: null 15 | pcdN: 768 16 | 17 | clipObservations: 5.0 18 | clipActions: 1.0 19 | 20 | frankaDofNoise: 0.25 21 | 22 | screwReward: 0.1 23 | eefDeviatePenalty: 1e-2 24 | successReward: 100.0 25 | failurePenalty: 0.0 26 | initialQ7NoiseLevel: 0.5 # +- 50% noise range 27 | initialQ1toQ6NoiseLevel: 0.02 # +- 2% noise range 28 | 29 | aggregateMode: 3 30 | 31 | actionScale: 1.0 32 | useQuatRot: false 33 | 34 | frankaDefaultDofPos: [-0.0186, -0.1682, 0.0344, -2.6468, 0.0269, 2.5371, 0.7211, 0.0145, 0.0145] 35 | 36 | pcAugmentation: 37 | enabled: true 38 | applyP: 0.4 39 | randomTransLow: [-0.04, -0.04, -0.04] 40 | randomTransHigh: [0.04, 0.04, 0.04] 41 | 42 | jitterRatio: 0.1 43 | jitterSigma: 0.01 44 | jitterHigh: 0.015 45 | jitterLow: -0.015 46 | 47 | propObsDim: 29 48 | obsKeys: 49 | - q 50 | - cos_q 51 | - sin_q 52 | - eef_pos 53 | - eef_quat 54 | - gripper_width 55 | 56 | privilegedObsDim: 74 57 | privilegedObsKeys: 58 | - square_table_top_pos 59 | - square_table_top_rot 60 | - square_table_top_vel 61 | - square_table_leg4_pos 62 | - square_table_leg4_rot 63 | - square_table_leg4_vel 64 | - eef_vel 65 | - ftip_center_pos 66 | - obstacle_front_pos 67 | - obstacle_left_pos 68 | - obstacle_right_pos 69 | - front_wall_cf 70 | - left_wall_cf 71 | - right_wall_cf 72 | - square_table_top_cf 73 | - square_table_leg4_cf 74 | - eef_lf_pos 75 | - eef_rf_pos 76 | - dq 77 | 78 | # set to True if you use camera sensors in the environment 79 | enableCameraSensors: False 80 | 81 | sim: 82 | dt: 0.01667 # 1/60 83 | substeps: 2 84 | up_axis: "z" 85 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 86 | gravity: [0.0, 0.0, -9.81] 87 | physx: 88 | num_threads: ${....num_threads} 89 | solver_type: ${....solver_type} 90 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 91 | num_position_iterations: 8 92 | num_velocity_iterations: 1 93 | contact_offset: 0.005 94 | rest_offset: 0.0 95 | bounce_threshold_velocity: 0.2 96 | max_depenetration_velocity: 1000.0 97 | default_buffer_size_multiplier: 5.0 98 | max_gpu_contact_pairs: 1048576 # 1024*1024 99 | num_subscenes: ${....num_subscenes} 100 | contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 101 | 102 | task: 103 | randomize: False 104 | -------------------------------------------------------------------------------- /main/cfg/task/StabilizePCD.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: StabilizePCD 3 | 4 | physics_engine: ${..physics_engine} 5 | seed: ${..seed} 6 | 7 | # if given, will override the device setting in gym. 8 | env: 9 | numEnvs: ${resolve_default:768,${...num_envs}} 10 | episodeLength: 100 11 | 12 | furniture: "one_leg" 13 | 14 | successWeight: 10.0 15 | failureWeight: 0.0 16 | qdPenalty: 1e-3 17 | actionPenalty: 1e-5 18 | 19 | pcdMaskRatio: null 20 | pcdN: 768 21 | 22 | clipObservations: 5.0 23 | clipActions: 1.0 24 | 25 | frankaDofNoise: 0.25 26 | 27 | aggregateMode: 3 28 | 29 | actionScale: 1.0 30 | useQuatRot: false 31 | 32 | frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035] 33 | 34 | pcAugmentation: 35 | enabled: true 36 | applyP: 0.4 37 | randomTransLow: [-0.04, -0.04, -0.04] 38 | randomTransHigh: [0.04, 0.04, 0.04] 39 | 40 | jitterRatio: 0.1 41 | jitterSigma: 0.01 42 | jitterHigh: 0.015 43 | jitterLow: -0.015 44 | 45 | propObsDim: 29 46 | obsKeys: 47 | - q 48 | - cos_q 49 | - sin_q 50 | - eef_pos 51 | - eef_quat 52 | - gripper_width 53 | 54 | privilegedObsDim: 145 55 | privilegedObsKeys: 56 | - square_table_top_pos 57 | - square_table_top_rot 58 | - square_table_top_vel 59 | - square_table_leg1_pos 60 | - square_table_leg1_rot 61 | - square_table_leg1_vel 62 | - square_table_leg2_pos 63 | - square_table_leg2_rot 64 | - square_table_leg2_vel 65 | - square_table_leg3_pos 66 | - square_table_leg3_rot 67 | - square_table_leg3_vel 68 | - square_table_leg4_pos 69 | - square_table_leg4_rot 70 | - square_table_leg4_vel 71 | - obstacle_front_pos 72 | - obstacle_left_pos 73 | - obstacle_right_pos 74 | - front_wall_cf 75 | - left_wall_cf 76 | - square_table_top_cf 77 | - square_table_leg1_cf 78 | - square_table_leg2_cf 79 | - square_table_leg3_cf 80 | - square_table_leg4_cf 81 | - eef_vel 82 | - eef_lf_pos 83 | - eef_rf_pos 84 | - q 85 | - cos_q 86 | - sin_q 87 | - dq 88 | - q_gripper 89 | 90 | # set to True if you use camera sensors in the environment 91 | enableCameraSensors: False 92 | 93 | sim: 94 | dt: 0.01667 # 1/60 95 | substeps: 2 96 | up_axis: "z" 97 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 98 | gravity: [0.0, 0.0, -9.81] 99 | physx: 100 | num_threads: ${....num_threads} 101 | solver_type: ${....solver_type} 102 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 103 | num_position_iterations: 8 104 | num_velocity_iterations: 1 105 | contact_offset: 0.005 106 | rest_offset: 0.0 107 | bounce_threshold_velocity: 0.2 108 | max_depenetration_velocity: 1000.0 109 | default_buffer_size_multiplier: 5.0 110 | max_gpu_contact_pairs: 1048576 # 1024*1024 111 | num_subscenes: ${....num_subscenes} 112 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 113 | 114 | task: 115 | randomize: False 116 | -------------------------------------------------------------------------------- /main/distillation/test.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | import hydra 3 | import transic 4 | from transic.utils.utils import set_seed 5 | from transic.distillation.data.data_module import DummyDataset 6 | from transic.utils.config_utils import omegaconf_to_dict 7 | 8 | import cv2 9 | import numpy as np 10 | 11 | cv2.imshow("dummy", np.zeros((1, 1, 3), dtype=np.uint8)) 12 | cv2.waitKey(1) 13 | 14 | 15 | @hydra.main(config_name="distillation_config", config_path="../cfg", version_base="1.1") 16 | def main(cfg): 17 | cfg.seed = set_seed(cfg.seed) 18 | 19 | if cfg.test.ckpt_path is None: 20 | print( 21 | "[WARNING] No ckpt_path is provided, will test with random weights. Press enter to continue." 22 | ) 23 | input() 24 | 25 | from transic.learn.lightning import LightingTrainer 26 | 27 | trainer_ = LightingTrainer(cfg) 28 | trainer_.trainer.loggers[-1].log_hyperparams(omegaconf_to_dict(cfg)) 29 | trainer_.trainer.test( 30 | model=trainer_.module, 31 | dataloaders=DummyDataset(batch_size=1, epoch_len=1).get_dataloader(), 32 | ckpt_path=cfg.test.ckpt_path, 33 | ) 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /main/distillation/train.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | import hydra 3 | import transic 4 | from transic.utils.utils import set_seed 5 | from transic.learn.lightning import LightingTrainer 6 | from transic.utils.config_utils import omegaconf_to_dict 7 | 8 | 9 | @hydra.main(config_name="distillation_config", config_path="../cfg", version_base="1.1") 10 | def main(cfg): 11 | cfg.seed = set_seed(cfg.seed) 12 | trainer_ = LightingTrainer(cfg) 13 | trainer_.trainer.loggers[-1].log_hyperparams(omegaconf_to_dict(cfg)) 14 | trainer_.fit() 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /main/residual/train.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | import hydra 3 | import transic 4 | from transic.utils.utils import set_seed 5 | from transic.learn.lightning import LightingTrainer 6 | from transic.utils.config_utils import omegaconf_to_dict 7 | 8 | 9 | @hydra.main(config_name="residual_config", config_path="../cfg", version_base="1.1") 10 | def main(cfg): 11 | cfg.seed = set_seed(cfg.seed) 12 | trainer_ = LightingTrainer(cfg) 13 | trainer_.trainer.loggers[-1].log_hyperparams(omegaconf_to_dict(cfg)) 14 | trainer_.fit() 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /main/rl/train.py: -------------------------------------------------------------------------------- 1 | # train.py 2 | # Script to train policies in Isaac Gym 3 | # 4 | # Copyright (c) 2018-2023, NVIDIA Corporation 5 | # All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | import hydra 33 | 34 | from omegaconf import DictConfig, OmegaConf 35 | import transic 36 | 37 | 38 | def preprocess_train_config(cfg, config_dict): 39 | """ 40 | Adding common configuration parameters to the rl_games train config. 41 | An alternative to this is inferring them in task-specific .yaml files, but that requires repeating the same 42 | variable interpolations in each config. 43 | """ 44 | 45 | train_cfg = config_dict["params"]["config"] 46 | 47 | train_cfg["device"] = cfg.rl_device 48 | 49 | train_cfg["population_based_training"] = False 50 | train_cfg["pbt_idx"] = None 51 | 52 | train_cfg["full_experiment_name"] = cfg.get("full_experiment_name") 53 | 54 | print(f"Using rl_device: {cfg.rl_device}") 55 | print(f"Using sim_device: {cfg.sim_device}") 56 | print(train_cfg) 57 | 58 | try: 59 | model_size_multiplier = config_dict["params"]["network"]["mlp"][ 60 | "model_size_multiplier" 61 | ] 62 | if model_size_multiplier != 1: 63 | units = config_dict["params"]["network"]["mlp"]["units"] 64 | for i, u in enumerate(units): 65 | units[i] = u * model_size_multiplier 66 | print( 67 | f'Modified MLP units by x{model_size_multiplier} to {config_dict["params"]["network"]["mlp"]["units"]}' 68 | ) 69 | except KeyError: 70 | pass 71 | 72 | return config_dict 73 | 74 | 75 | @hydra.main(version_base="1.1", config_name="config", config_path="../cfg") 76 | def launch_rlg_hydra(cfg: DictConfig): 77 | import os 78 | from datetime import datetime 79 | 80 | import isaacgym 81 | from hydra.utils import to_absolute_path 82 | 83 | if cfg.display: 84 | import cv2 85 | import numpy as np 86 | 87 | cv2.imshow("dummy", np.zeros((1, 1, 3), dtype=np.uint8)) 88 | cv2.waitKey(1) 89 | 90 | import transic_envs 91 | from transic.utils.reformat import omegaconf_to_dict, print_dict 92 | from transic.utils.utils import set_np_formatting, set_seed 93 | 94 | from transic.utils.rlgames_utils import ( 95 | RLGPUAlgoObserver, 96 | MultiObserver, 97 | ComplexObsRLGPUEnv, 98 | ) 99 | from transic.utils.wandb_utils import WandbAlgoObserver 100 | from rl_games.common import env_configurations, vecenv 101 | from transic.rl.runner import Runner 102 | from transic.rl.network_builder import DictObsBuilder 103 | from transic.rl.models import ModelA2CContinuousLogStd 104 | from rl_games.algos_torch.model_builder import register_network, register_model 105 | from transic.utils.wandb_utils import WandbVideoCaptureWrapper 106 | 107 | register_model("my_continuous_a2c_logstd", ModelA2CContinuousLogStd) 108 | register_network("dict_obs_actor_critic", DictObsBuilder) 109 | 110 | # ensure checkpoints can be specified as relative paths 111 | if cfg.checkpoint: 112 | cfg.checkpoint = to_absolute_path(cfg.checkpoint) 113 | 114 | cfg_dict = omegaconf_to_dict(cfg) 115 | print_dict(cfg_dict) 116 | 117 | # set numpy formatting for printing only 118 | set_np_formatting() 119 | 120 | # global rank of the GPU 121 | global_rank = int(os.getenv("RANK", "0")) 122 | 123 | # sets seed. if seed is -1 will pick a random one 124 | cfg.seed = set_seed( 125 | cfg.seed, torch_deterministic=cfg.torch_deterministic, rank=global_rank 126 | ) 127 | 128 | def create_isaacgym_env(): 129 | kwargs = dict( 130 | sim_device=cfg.sim_device, 131 | rl_device=cfg.rl_device, 132 | graphics_device_id=cfg.graphics_device_id, 133 | multi_gpu=cfg.multi_gpu, 134 | cfg=cfg.task, 135 | display=cfg.display, 136 | record=cfg.capture_video, 137 | has_headless_arg=False, 138 | ) 139 | if not cfg.headless: 140 | assert ( 141 | "pcd" not in cfg.task_name.lower() 142 | ), "TODO: add GUI support for PCD tasks" 143 | if "pcd" not in cfg.task_name.lower(): 144 | kwargs["headless"] = cfg.headless 145 | kwargs["has_headless_arg"] = True 146 | envs = transic_envs.make(**kwargs) 147 | if cfg.capture_video: 148 | envs.is_vector_env = True 149 | envs = WandbVideoCaptureWrapper( 150 | envs, 151 | n_parallel_recorders=cfg.n_parallel_recorders, 152 | n_successful_videos_to_record=cfg.n_successful_videos_to_record, 153 | ) 154 | return envs 155 | 156 | env_configurations.register( 157 | "rlgpu", 158 | { 159 | "vecenv_type": "RLGPU", 160 | "env_creator": create_isaacgym_env, 161 | }, 162 | ) 163 | 164 | obs_spec = {} 165 | if "central_value_config" in cfg.rl_train.params.config: 166 | critic_net_cfg = cfg.rl_train.params.config.central_value_config.network 167 | obs_spec["states"] = { 168 | "names": list(critic_net_cfg.inputs.keys()), 169 | "concat": not critic_net_cfg.name == "complex_net", 170 | "space_name": "state_space", 171 | } 172 | 173 | vecenv.register( 174 | "RLGPU", lambda config_name, num_actors: ComplexObsRLGPUEnv(config_name) 175 | ) 176 | 177 | rlg_config_dict = omegaconf_to_dict(cfg.rl_train) 178 | rlg_config_dict = preprocess_train_config(cfg, rlg_config_dict) 179 | 180 | observers = [RLGPUAlgoObserver()] 181 | 182 | if cfg.wandb_activate: 183 | cfg.seed += global_rank 184 | if global_rank == 0: 185 | # initialize wandb only once per multi-gpu run 186 | wandb_observer = WandbAlgoObserver(cfg) 187 | observers.append(wandb_observer) 188 | 189 | def build_runner(algo_observer): 190 | runner = Runner(algo_observer) 191 | return runner 192 | 193 | # convert CLI arguments into dictionary 194 | # create runner and set the settings 195 | runner = build_runner(MultiObserver(observers)) 196 | runner.load(rlg_config_dict) 197 | runner.reset() 198 | 199 | # dump config dict 200 | if cfg.test: 201 | prefix = "dump_" if cfg.save_rollouts else "test_" 202 | experiment_dir = os.path.join( 203 | "runs", 204 | prefix 205 | + cfg.rl_train.params.config.name 206 | + "_{date:%m-%d-%H-%M-%S}".format(date=datetime.now()), 207 | ) 208 | else: 209 | experiment_dir = os.path.join( 210 | "runs", 211 | cfg.rl_train.params.config.name 212 | + "_{date:%m-%d-%H-%M-%S}".format(date=datetime.now()), 213 | ) 214 | os.makedirs(experiment_dir, exist_ok=True) 215 | with open(os.path.join(experiment_dir, "config.yaml"), "w") as f: 216 | f.write(OmegaConf.to_yaml(cfg)) 217 | 218 | runner.run( 219 | { 220 | "train": not cfg.test, 221 | "play": cfg.test, 222 | "checkpoint": cfg.checkpoint, 223 | "from_ckpt_epoch": cfg.from_ckpt_epoch, 224 | "sigma": cfg.sigma if cfg.sigma != "" else None, 225 | "save_rollouts": { 226 | "save_rollouts": cfg.save_rollouts, 227 | "rollout_saving_fpath": os.path.join(experiment_dir, "rollouts.hdf5"), 228 | "save_successful_rollouts_only": cfg.save_successful_rollouts_only, 229 | "num_rollouts_to_save": cfg.num_rollouts_to_save, 230 | "min_episode_length": cfg.min_episode_length, 231 | }, 232 | } 233 | ) 234 | 235 | 236 | if __name__ == "__main__": 237 | launch_rlg_hydra() 238 | -------------------------------------------------------------------------------- /media/SUSig-red.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/media/SUSig-red.png -------------------------------------------------------------------------------- /media/method_overview.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/media/method_overview.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | rl-games==1.6.1 2 | gym==0.23.1 3 | hydra-core 4 | h5py 5 | dm_tree 6 | einops 7 | pytorch_lightning -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import pkg_resources 4 | from setuptools import setup, find_packages 5 | 6 | PKG_NAME = "transic" 7 | VERSION = "0.0.1" 8 | EXTRAS = {} 9 | 10 | 11 | def _read_file(fname): 12 | with pathlib.Path(fname).open() as fp: 13 | return fp.read() 14 | 15 | 16 | def _read_install_requires(): 17 | with pathlib.Path("requirements.txt").open() as fp: 18 | return [ 19 | str(requirement) for requirement in pkg_resources.parse_requirements(fp) 20 | ] 21 | 22 | 23 | setup( 24 | name=PKG_NAME, 25 | version=VERSION, 26 | author="TRANSIC Developers", 27 | description="research project", 28 | long_description=_read_file("README.md"), 29 | long_description_content_type="text/markdown", 30 | keywords=["Robotics", "Reinforcement Learning", "Machine Learning"], 31 | license="Apache License, Version 2.0", 32 | packages=find_packages(include=f"{PKG_NAME}.*"), 33 | include_package_data=True, 34 | zip_safe=False, 35 | entry_points={"console_scripts": []}, 36 | install_requires=_read_install_requires(), 37 | python_requires="==3.8.*", 38 | classifiers=[ 39 | "Development Status :: 3 - Alpha", 40 | "Topic :: Scientific/Engineering :: Robotics", 41 | "Environment :: Console", 42 | "Programming Language :: Python :: 3", 43 | ], 44 | ) 45 | -------------------------------------------------------------------------------- /transic/__init__.py: -------------------------------------------------------------------------------- 1 | from omegaconf import DictConfig, OmegaConf 2 | 3 | 4 | def _is_cuda_solver(x, y): 5 | if isinstance(y, int): 6 | return y >= 0 7 | if isinstance(y, str): 8 | if "cuda" in y.lower(): 9 | return True 10 | else: 11 | return x.lower() in y.lower() 12 | 13 | 14 | OmegaConf.register_new_resolver("eq", lambda x, y: x.lower() == y.lower()) 15 | OmegaConf.register_new_resolver("contains", _is_cuda_solver) 16 | OmegaConf.register_new_resolver("if", lambda pred, a, b: a if pred else b) 17 | OmegaConf.register_new_resolver( 18 | "resolve_default", lambda default, arg: default if arg == "" else arg 19 | ) 20 | OmegaConf.register_new_resolver("multiply", lambda x, y: x * y) 21 | OmegaConf.register_new_resolver("floor_divide", lambda x, y: x // y) 22 | OmegaConf.register_new_resolver( 23 | "find_rl_train_config", 24 | lambda x: x + "PPO" if x[-3:] != "PCD" else x[:-3] + "PPO", 25 | ) 26 | -------------------------------------------------------------------------------- /transic/distillation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/distillation/__init__.py -------------------------------------------------------------------------------- /transic/distillation/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_module import DistillationDataModule 2 | -------------------------------------------------------------------------------- /transic/distillation/data/collate.py: -------------------------------------------------------------------------------- 1 | from math import ceil 2 | from copy import deepcopy 3 | 4 | import numpy as np 5 | 6 | from transic.utils.array import ( 7 | any_slice, 8 | nested_np_split, 9 | get_batch_size, 10 | any_concat, 11 | any_stack, 12 | any_ones_like, 13 | any_to_torch_tensor, 14 | ) 15 | 16 | 17 | def collate_fn( 18 | sample_list, 19 | with_matched_scene: bool, 20 | ctx_len: int = 5, 21 | ): 22 | """ 23 | sample_list: List of 24 | Tuple[main_data: Dict, Tuple[Tuple[real_pcds, real_pcd_ee_masks], Tuple[sim_pcds, sim_pcd_ee_masks]]] if with_matched_scene is True 25 | main_data: Dict, if with_matched_scene is False 26 | """ 27 | if with_matched_scene: 28 | main_data = [sample[0] for sample in sample_list] # List[Dict] 29 | matched_scenes = [ 30 | sample[1] for sample in sample_list 31 | ] # List[Tuple[Tuple[real_pcds, real_pcd_ee_masks], Tuple[sim_pcds, sim_pcd_ee_masks]]] 32 | else: 33 | main_data = sample_list 34 | matched_scenes = None 35 | 36 | L_max = max(get_batch_size(sample) for sample in main_data) 37 | N_chunks = ceil(L_max / ctx_len) 38 | L_pad_max = N_chunks * ctx_len 39 | 40 | sample_structure = deepcopy(any_slice(main_data[0], np.s_[0:1])) 41 | # pad to max length in this batch 42 | processed_main_data = any_stack( 43 | [ 44 | any_concat( 45 | [ 46 | sample, 47 | ] 48 | + [any_ones_like(sample_structure)] 49 | * (L_pad_max - get_batch_size(sample)), 50 | dim=0, 51 | ) 52 | for sample in main_data 53 | ], 54 | dim=0, 55 | ) # dict of (B, L_pad_max, ...) 56 | # construct mask 57 | mask = any_stack( 58 | [ 59 | any_concat( 60 | [ 61 | np.ones((get_batch_size(sample),), dtype=bool), 62 | np.zeros((L_pad_max - get_batch_size(sample),), dtype=bool), 63 | ] 64 | ) 65 | for sample in main_data 66 | ], 67 | dim=0, 68 | ) # (B, L_pad_max) 69 | 70 | # split into chunks 71 | processed_main_data = { 72 | k: any_stack(v, dim=0) 73 | for k, v in nested_np_split(processed_main_data, N_chunks, axis=1).items() 74 | } # dict of (N_chunks, B, ctx_len, ...) 75 | mask = any_stack(np.split(mask, N_chunks, axis=1), dim=0) # (N_chunks, B, ctx_len) 76 | processed_main_data["pad_mask"] = mask 77 | 78 | # convert to tensor 79 | processed_main_data = { 80 | k: any_to_torch_tensor(v) for k, v in processed_main_data.items() 81 | } 82 | if matched_scenes is not None: 83 | matched_scenes = any_stack(matched_scenes, dim=0) 84 | matched_scenes_tensor = ( 85 | ( 86 | any_to_torch_tensor(matched_scenes[0][0]), 87 | any_to_torch_tensor(matched_scenes[0][1]), 88 | ), 89 | ( 90 | any_to_torch_tensor(matched_scenes[1][0]), 91 | any_to_torch_tensor(matched_scenes[1][1]), 92 | ), 93 | ) 94 | return processed_main_data, matched_scenes_tensor 95 | return processed_main_data 96 | -------------------------------------------------------------------------------- /transic/distillation/data/data_module.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | from functools import partial 3 | 4 | from torch.utils.data import DataLoader 5 | from pytorch_lightning import LightningDataModule 6 | 7 | from transic.distillation.data.dummy import DummyDataset 8 | from transic.distillation.data.collate import collate_fn as _collate_fn 9 | from transic.distillation.data.dataset import ( 10 | DistillationDataset, 11 | DistillationSeqDataset, 12 | ) 13 | 14 | 15 | class DistillationDataModule(LightningDataModule): 16 | def __init__( 17 | self, 18 | *, 19 | data_path: str, 20 | matched_scene_data_path: Optional[str] = None, 21 | ctx_len: int = -1, # -1 means not using the SeqDataset at all 22 | skip_first_n_steps: int, 23 | sampled_pcd_points: int, 24 | refresh_pcd_sampling_idxs_interval: float, 25 | real_pcd_x_limits: Tuple[float, float], 26 | real_pcd_y_limits: Tuple[float, float], 27 | real_pcd_z_min: float, 28 | batch_size: int, 29 | dataloader_num_workers: int, 30 | seed: Optional[int] = None, 31 | ): 32 | super().__init__() 33 | self._data_path = data_path 34 | self._matched_scene_data_path = matched_scene_data_path 35 | self._skip_first_n_steps = skip_first_n_steps 36 | self._sampled_pcd_points = sampled_pcd_points 37 | self._refresh_pcd_sampling_idxs_interval = refresh_pcd_sampling_idxs_interval 38 | self._real_pcd_x_limits = real_pcd_x_limits 39 | self._real_pcd_y_limits = real_pcd_y_limits 40 | self._real_pcd_z_min = real_pcd_z_min 41 | 42 | self._batch_size = batch_size 43 | self._dataloader_num_workers = dataloader_num_workers 44 | self._seed = seed 45 | 46 | self._ds_cls = DistillationSeqDataset if ctx_len != -1 else DistillationDataset 47 | self._collate_fn = ( 48 | partial( 49 | _collate_fn, 50 | with_matched_scene=matched_scene_data_path is not None, 51 | ctx_len=ctx_len, 52 | ) 53 | if ctx_len != -1 54 | else None 55 | ) 56 | self._train_dataset = None 57 | 58 | def setup(self, stage: str) -> None: 59 | if stage == "fit" or stage is None: 60 | self._train_dataset = self._ds_cls( 61 | fpath=self._data_path, 62 | matched_scene_fpath=self._matched_scene_data_path, 63 | sampled_pcd_points=self._sampled_pcd_points, 64 | skip_first_n_steps=self._skip_first_n_steps, 65 | refresh_pcd_sampling_idxs_interval=self._refresh_pcd_sampling_idxs_interval, 66 | real_pcd_x_limits=self._real_pcd_x_limits, 67 | real_pcd_y_limits=self._real_pcd_y_limits, 68 | real_pcd_z_min=self._real_pcd_z_min, 69 | seed=self._seed, 70 | ) 71 | 72 | def train_dataloader(self): 73 | return DataLoader( 74 | self._train_dataset, 75 | batch_size=self._batch_size, 76 | num_workers=min(self._batch_size, self._dataloader_num_workers), 77 | pin_memory=True, 78 | persistent_workers=True, 79 | collate_fn=self._collate_fn, 80 | ) 81 | 82 | def val_dataloader(self): 83 | return DummyDataset(batch_size=1).get_dataloader() 84 | -------------------------------------------------------------------------------- /transic/distillation/data/dummy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data import Dataset, DataLoader 3 | 4 | 5 | class DummyDataset(Dataset): 6 | """ 7 | For test_step(), simply returns None N times. 8 | test_step() can have arbitrary logic 9 | """ 10 | 11 | def __init__(self, batch_size, epoch_len=1): 12 | """ 13 | Still set batch_size because pytorch_lightning tracks it 14 | """ 15 | self.n = epoch_len 16 | self._batch_size = batch_size 17 | 18 | def __len__(self): 19 | return self.n 20 | 21 | def __getitem__(self, i): 22 | return np.zeros((self._batch_size,), dtype=bool) 23 | 24 | def get_dataloader(self) -> DataLoader: 25 | """ 26 | Our dataset directly returns batched tensors instead of single samples, 27 | so for DataLoader we don't need a real collate_fn and set batch_size=1 28 | """ 29 | return DataLoader( 30 | self, 31 | batch_size=1, 32 | num_workers=0, 33 | pin_memory=True, 34 | shuffle=False, 35 | collate_fn=_singleton_collate_fn, 36 | ) 37 | 38 | 39 | def _singleton_collate_fn(tensor_list): 40 | """ 41 | Our dataset directly returns batched tensors instead of single samples, 42 | so for DataLoader we don't need a real collate_fn. 43 | """ 44 | assert len(tensor_list) == 1, "INTERNAL: collate_fn only allows a single item" 45 | return tensor_list[0] 46 | -------------------------------------------------------------------------------- /transic/distillation/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from .pointnet_policy import PointNetPolicy 2 | from .rnn_pointnet_policy import RNNPointNetPolicy 3 | -------------------------------------------------------------------------------- /transic/distillation/policy/pointnet_policy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from transic.learn.policy import GMMHead, BasePolicy 4 | from transic.nn.features import SimpleFeatureFusion, PointNet, Identity 5 | 6 | 7 | class PointNetPolicy(BasePolicy): 8 | def __init__( 9 | self, 10 | *, 11 | point_channels: int = 3, 12 | subtract_point_mean: bool = False, 13 | add_ee_embd: bool = False, 14 | ee_embd_dim: int, 15 | pointnet_output_dim: int, 16 | pointnet_hidden_dim: int, 17 | pointnet_hidden_depth: int, 18 | pointnet_activation: str = "gelu", 19 | prop_input_dim: int, 20 | feature_fusion_hidden_depth: int = 1, 21 | feature_fusion_hidden_dim: int = 256, 22 | feature_fusion_output_dim: int = 256, 23 | feature_fusion_activation: str = "relu", 24 | feature_fusion_add_input_activation: bool = False, 25 | feature_fusion_add_output_activation: bool = False, 26 | action_dim: int, 27 | action_net_gmm_n_modes: int = 5, 28 | action_net_hidden_dim: int, 29 | action_net_hidden_depth: int, 30 | action_net_activation: str = "relu", 31 | deterministic_inference: bool = True, 32 | gmm_low_noise_eval: bool = True, 33 | ): 34 | super().__init__() 35 | 36 | self.feature_extractor = SimpleFeatureFusion( 37 | extractors={ 38 | "pcd": PointNet( 39 | n_coordinates=point_channels, 40 | add_ee_embd=add_ee_embd, 41 | ee_embd_dim=ee_embd_dim, 42 | output_dim=pointnet_output_dim, 43 | hidden_dim=pointnet_hidden_dim, 44 | hidden_depth=pointnet_hidden_depth, 45 | activation=pointnet_activation, 46 | subtract_mean=subtract_point_mean, 47 | ), 48 | "proprioception": Identity(prop_input_dim), 49 | }, 50 | hidden_depth=feature_fusion_hidden_depth, 51 | hidden_dim=feature_fusion_hidden_dim, 52 | output_dim=feature_fusion_output_dim, 53 | activation=feature_fusion_activation, 54 | add_input_activation=feature_fusion_add_input_activation, 55 | add_output_activation=feature_fusion_add_output_activation, 56 | ) 57 | 58 | self.action_net = GMMHead( 59 | feature_fusion_output_dim, 60 | n_modes=action_net_gmm_n_modes, 61 | action_dim=action_dim, 62 | hidden_dim=action_net_hidden_dim, 63 | hidden_depth=action_net_hidden_depth, 64 | activation=action_net_activation, 65 | low_noise_eval=gmm_low_noise_eval, 66 | ) 67 | self._deterministic_inference = deterministic_inference 68 | 69 | def forward(self, obs): 70 | return self.action_net(self.feature_extractor(obs)) 71 | 72 | @torch.no_grad() 73 | def act(self, obs, deterministic=None): 74 | dist = self.forward(obs) 75 | if deterministic is None: 76 | deterministic = self._deterministic_inference 77 | if deterministic: 78 | return dist.mode() 79 | else: 80 | return dist.sample() 81 | -------------------------------------------------------------------------------- /transic/distillation/policy/rnn_pointnet_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Optional 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from transic.learn.policy import GMMHead, BasePolicy 7 | from transic.nn.features import SimpleFeatureFusion, PointNet, Identity 8 | from transic.utils.array import get_batch_size, any_slice 9 | 10 | 11 | RNN_CLS = { 12 | "lstm": nn.LSTM, 13 | "gru": nn.GRU, 14 | } 15 | 16 | 17 | class RNNPointNetPolicy(BasePolicy): 18 | is_sequence_policy = True 19 | 20 | def __init__( 21 | self, 22 | *, 23 | point_channels: int = 3, 24 | subtract_point_mean: bool = False, 25 | add_ee_embd: bool = False, 26 | ee_embd_dim: int, 27 | pointnet_output_dim: int, 28 | pointnet_hidden_dim: int, 29 | pointnet_hidden_depth: int, 30 | pointnet_activation: str = "gelu", 31 | prop_input_dim: int, 32 | feature_fusion_hidden_depth: int = 1, 33 | feature_fusion_hidden_dim: int = 256, 34 | feature_fusion_output_dim: int = 256, 35 | feature_fusion_activation: str = "relu", 36 | feature_fusion_add_input_activation: bool = False, 37 | feature_fusion_add_output_activation: bool = False, 38 | rnn_type: Literal["lstm", "gru"], 39 | rnn_n_layers: int = 2, 40 | rnn_hidden_dim: int = 256, 41 | ctx_len: int, 42 | action_dim: int, 43 | action_net_gmm_n_modes: int = 5, 44 | action_net_hidden_dim: int, 45 | action_net_hidden_depth: int, 46 | action_net_activation: str = "relu", 47 | deterministic_inference: bool = True, 48 | gmm_low_noise_eval: bool = True, 49 | ): 50 | super().__init__() 51 | 52 | self.ctx_len = ctx_len 53 | 54 | self.feature_extractor = SimpleFeatureFusion( 55 | extractors={ 56 | "pcd": PointNet( 57 | n_coordinates=point_channels, 58 | add_ee_embd=add_ee_embd, 59 | ee_embd_dim=ee_embd_dim, 60 | output_dim=pointnet_output_dim, 61 | hidden_dim=pointnet_hidden_dim, 62 | hidden_depth=pointnet_hidden_depth, 63 | activation=pointnet_activation, 64 | subtract_mean=subtract_point_mean, 65 | ), 66 | "proprioception": Identity(prop_input_dim), 67 | }, 68 | hidden_depth=feature_fusion_hidden_depth, 69 | hidden_dim=feature_fusion_hidden_dim, 70 | output_dim=feature_fusion_output_dim, 71 | activation=feature_fusion_activation, 72 | add_input_activation=feature_fusion_add_input_activation, 73 | add_output_activation=feature_fusion_add_output_activation, 74 | ) 75 | 76 | assert rnn_type in ["lstm", "gru"] 77 | rnn_cls = RNN_CLS[rnn_type] 78 | self.rnn = rnn_cls( 79 | input_size=feature_fusion_output_dim, 80 | hidden_size=rnn_hidden_dim, 81 | num_layers=rnn_n_layers, 82 | batch_first=True, 83 | ) 84 | 85 | self.action_net = GMMHead( 86 | rnn_hidden_dim, 87 | n_modes=action_net_gmm_n_modes, 88 | action_dim=action_dim, 89 | hidden_dim=action_net_hidden_dim, 90 | hidden_depth=action_net_hidden_depth, 91 | activation=action_net_activation, 92 | low_noise_eval=gmm_low_noise_eval, 93 | ) 94 | self._deterministic_inference = deterministic_inference 95 | 96 | def get_initial_state(self, batch_size: int, timesteps: Optional[int] = None): 97 | h_0 = torch.zeros( 98 | self.rnn.num_layers, batch_size, self.rnn.hidden_size, device=self.device 99 | ) 100 | if isinstance(self.rnn, nn.LSTM): 101 | c_0 = torch.zeros_like(h_0) 102 | return h_0, c_0 103 | return h_0 104 | 105 | def update_state(self, *, old_state, new_state, idxs): 106 | if isinstance(self.rnn, nn.LSTM): 107 | h_old, c_old = old_state 108 | h_new, c_new = new_state 109 | h_old[:, idxs] = h_new 110 | c_old[:, idxs] = c_new 111 | return h_old, c_old 112 | elif isinstance(self.rnn, nn.GRU): 113 | old_state[:, idxs] = new_state 114 | return old_state 115 | else: 116 | raise NotImplementedError(f"Unknown RNN type {type(self.rnn)}") 117 | 118 | def forward(self, obs, policy_state): 119 | """ 120 | obs: dict of (B, L, ...) 121 | rnn_state: (h_0, c_0) or h_0 122 | """ 123 | x = self.feature_extractor(obs) 124 | x, policy_state = self.rnn(x, policy_state) 125 | return self.action_net(x), policy_state 126 | 127 | @torch.no_grad() 128 | def act(self, obs, policy_state, deterministic=None): 129 | """ 130 | obs: dict of (B, L=1, ...) 131 | rnn_state: (h_0, c_0) or h_0 132 | """ 133 | assert get_batch_size(any_slice(obs, 0), strict=True) == 1, "Use L=1 for act" 134 | dist, policy_state = self.forward(obs, policy_state) 135 | if deterministic is None: 136 | deterministic = self._deterministic_inference 137 | if deterministic: 138 | action = dist.mode() 139 | else: 140 | action = dist.sample() 141 | # action is (B, L=1, A), reduce to (B, A) 142 | action = action[:, 0] 143 | return action, policy_state 144 | -------------------------------------------------------------------------------- /transic/learn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/learn/__init__.py -------------------------------------------------------------------------------- /transic/learn/lightning.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import os 3 | import time 4 | from copy import deepcopy 5 | import pprint 6 | 7 | import sys 8 | from omegaconf import DictConfig, OmegaConf, ListConfig 9 | import pytorch_lightning as pl 10 | import pytorch_lightning.loggers as pl_loggers 11 | from pytorch_lightning.callbacks import ( 12 | Callback, 13 | ModelCheckpoint, 14 | ProgressBar, 15 | TQDMProgressBar, 16 | ) 17 | from pytorch_lightning.utilities import rank_zero_only 18 | from hydra.utils import instantiate 19 | 20 | 21 | class LightingTrainer: 22 | def __init__(self, cfg: DictConfig): 23 | cfg = deepcopy(cfg) 24 | OmegaConf.set_struct(cfg, False) 25 | self.cfg = cfg 26 | self.run_command_args = sys.argv[1:] 27 | run_name = self.generate_run_name(cfg) 28 | self.run_dir = os.path.join(cfg.exp_root_dir, run_name) 29 | rank_zero_print("Run name:", run_name, "\nExp dir:", self.run_dir) 30 | os.makedirs(self.run_dir, exist_ok=True) 31 | os.makedirs(os.path.join(self.run_dir, "tb"), exist_ok=True) 32 | os.makedirs(os.path.join(self.run_dir, "logs"), exist_ok=True) 33 | os.makedirs(os.path.join(self.run_dir, "ckpt"), exist_ok=True) 34 | OmegaConf.save(cfg, os.path.join(self.run_dir, "conf.yaml")) 35 | self.cfg = cfg 36 | self.run_name = run_name 37 | self.ckpt_cfg = cfg.trainer.pop("checkpoint") 38 | self.data_module = self.create_data_module(cfg) 39 | self._monkey_patch_add_info(self.data_module) 40 | self.trainer = self.create_trainer(cfg) 41 | self.module = self.create_module(cfg) 42 | self.module.data_module = self.data_module 43 | self._monkey_patch_add_info(self.module) 44 | 45 | def create_module(self, cfg): 46 | return instantiate(cfg.module, _recursive_=False) 47 | 48 | def create_data_module(self, cfg): 49 | return instantiate(cfg.data_module) 50 | 51 | def generate_run_name(self, cfg): 52 | return cfg.run_name + "_" + time.strftime("%Y%m%d-%H%M%S") 53 | 54 | def _monkey_patch_add_info(self, obj): 55 | """ 56 | Add useful info to module and data_module so they can access directly 57 | """ 58 | # our own info 59 | obj.run_config = self.cfg 60 | obj.run_name = self.run_name 61 | obj.run_command_args = self.run_command_args 62 | # add properties from trainer 63 | for attr in [ 64 | "global_rank", 65 | "local_rank", 66 | "world_size", 67 | "num_nodes", 68 | "num_processes", 69 | "node_rank", 70 | "num_gpus", 71 | "data_parallel_device_ids", 72 | ]: 73 | if hasattr(obj, attr): 74 | continue 75 | setattr( 76 | obj.__class__, 77 | attr, 78 | # force capture 'attr' 79 | property(lambda self, attr=attr: getattr(self.trainer, attr)), 80 | ) 81 | 82 | def create_loggers(self, cfg) -> List[pl.loggers.Logger]: 83 | loggers = [ 84 | pl_loggers.TensorBoardLogger(self.run_dir, name="tb", version=""), 85 | pl_loggers.CSVLogger(self.run_dir, name="logs", version=""), 86 | ] 87 | if cfg.use_wandb: 88 | loggers.append( 89 | pl_loggers.WandbLogger( 90 | name=cfg.wandb_run_name, project=cfg.wandb_project, id=self.run_name 91 | ) 92 | ) 93 | return loggers 94 | 95 | def create_callbacks(self) -> List[Callback]: 96 | ModelCheckpoint.FILE_EXTENSION = ".pth" 97 | callbacks = [] 98 | if isinstance(self.ckpt_cfg, DictConfig): 99 | ckpt = ModelCheckpoint( 100 | dirpath=os.path.join(self.run_dir, "ckpt"), **self.ckpt_cfg 101 | ) 102 | callbacks.append(ckpt) 103 | else: 104 | assert isinstance(self.ckpt_cfg, ListConfig) 105 | for _cfg in self.ckpt_cfg: 106 | ckpt = ModelCheckpoint( 107 | dirpath=os.path.join(self.run_dir, "ckpt"), **_cfg 108 | ) 109 | callbacks.append(ckpt) 110 | 111 | if not any(isinstance(c, ProgressBar) for c in callbacks): 112 | callbacks.append(TQDMProgressBar()) 113 | rank_zero_print( 114 | "Lightning callbacks:", [c.__class__.__name__ for c in callbacks] 115 | ) 116 | return callbacks 117 | 118 | def create_trainer(self, cfg) -> pl.Trainer: 119 | assert "trainer" in cfg 120 | C = cfg.trainer 121 | return instantiate( 122 | C, logger=self.create_loggers(cfg), callbacks=self.create_callbacks() 123 | ) 124 | 125 | @property 126 | def tb_logger(self): 127 | return self.logger[0].experiment 128 | 129 | def fit(self): 130 | return self.trainer.fit( 131 | self.module, 132 | datamodule=self.data_module, 133 | ckpt_path=None, 134 | ) 135 | 136 | 137 | def pprint_(*objs, **kwargs): 138 | """ 139 | Use pprint to format the objects 140 | """ 141 | print( 142 | *[ 143 | pprint.pformat(obj, indent=2) if not isinstance(obj, str) else obj 144 | for obj in objs 145 | ], 146 | **kwargs, 147 | ) 148 | 149 | 150 | @rank_zero_only 151 | def rank_zero_print(*msg, **kwargs): 152 | pprint_(*msg, **kwargs) 153 | -------------------------------------------------------------------------------- /transic/learn/lr_schedule.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import numpy as np 4 | 5 | 6 | def generate_cosine_schedule( 7 | base_value, 8 | final_value, 9 | epochs, 10 | steps_per_epoch, 11 | warmup_epochs=0, 12 | warmup_start_value=0, 13 | ) -> np.ndarray: 14 | warmup_schedule = np.array([]) 15 | warmup_iters = int(warmup_epochs * steps_per_epoch) 16 | if warmup_epochs > 0: 17 | warmup_schedule = np.linspace(warmup_start_value, base_value, warmup_iters) 18 | 19 | iters = np.arange(int(epochs * steps_per_epoch) - warmup_iters) 20 | schedule = np.array( 21 | [ 22 | final_value 23 | + 0.5 24 | * (base_value - final_value) 25 | * (1 + math.cos(math.pi * i / (len(iters)))) 26 | for i in iters 27 | ] 28 | ) 29 | schedule = np.concatenate((warmup_schedule, schedule)) 30 | assert len(schedule) == int(epochs * steps_per_epoch) 31 | return schedule 32 | 33 | 34 | class CosineScheduleFunction: 35 | def __init__( 36 | self, 37 | base_value, 38 | final_value, 39 | epochs, 40 | steps_per_epoch, 41 | warmup_epochs=0, 42 | warmup_start_value=0, 43 | ): 44 | """ 45 | Usage: 46 | scheduler = torch.optim.lr_scheduler.LambdaLR( 47 | optimizer=optimizer, lr_lambda=CosineScheduleFunction(**kwargs) 48 | ) 49 | or simply use CosineScheduler(**kwargs) 50 | 51 | Args: 52 | epochs: effective epochs for the cosine schedule, *including* warmup 53 | after these epochs, scheduler will output `final_value` ever after 54 | """ 55 | assert warmup_epochs < epochs, f"{warmup_epochs=} must be < {epochs=}" 56 | self._effective_steps = int(epochs * steps_per_epoch) 57 | self.schedule = generate_cosine_schedule( 58 | base_value=base_value, 59 | final_value=final_value, 60 | epochs=epochs, 61 | steps_per_epoch=steps_per_epoch, 62 | warmup_epochs=warmup_epochs, 63 | warmup_start_value=warmup_start_value, 64 | ) 65 | assert self.schedule.shape == (self._effective_steps,) 66 | self._final_value = final_value 67 | self._steps_tensor = torch.tensor(0, dtype=torch.long) # for register buffer 68 | 69 | def register_buffer(self, module: torch.nn.Module, name="cosine_steps"): 70 | module.register_buffer(name, self._steps_tensor, persistent=True) 71 | 72 | def __call__(self, step): 73 | self._steps_tensor.copy_(torch.tensor(step)) 74 | if step >= self._effective_steps: 75 | val = self._final_value 76 | else: 77 | val = self.schedule[step] 78 | return val 79 | -------------------------------------------------------------------------------- /transic/learn/optimizer_group.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Union, List, Tuple 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from transic.utils.misc_utils import match_patterns 7 | 8 | FilterType = Union[ 9 | Callable[[str, torch.Tensor], bool], List[str], Tuple[str], str, None 10 | ] 11 | 12 | 13 | def default_optimizer_groups( 14 | model: nn.Module, 15 | weight_decay: float, 16 | lr_scale: float = 1.0, 17 | no_decay_filter: FilterType = None, 18 | exclude_filter: FilterType = None, 19 | ): 20 | """ 21 | lr_scale is only effective when using with enlight.learn.lr_schedule.LambdaLRWithScale 22 | 23 | Returns: 24 | [{'lr_scale': 1.0, 'weight_decay': weight_decay, 'params': decay_group}, 25 | {'lr_scale': 1.0, 'weight_decay': 0.0, 'params': no_decay_group}], 26 | list of all param_ids processed 27 | """ 28 | no_decay_filter = _transform_filter(no_decay_filter) 29 | exclude_filter = _transform_filter(exclude_filter) 30 | decay_group = [] 31 | no_decay_group = [] 32 | all_params_id = [] 33 | for n, p in model.named_parameters(): 34 | all_params_id.append(id(p)) 35 | if not p.requires_grad or exclude_filter(n, p): 36 | continue 37 | 38 | # no decay: all 1D parameters and model specific ones 39 | if p.ndim == 1 or no_decay_filter(n, p): 40 | no_decay_group.append(p) 41 | else: 42 | decay_group.append(p) 43 | return [ 44 | {"weight_decay": weight_decay, "params": decay_group, "lr_scale": lr_scale}, 45 | {"weight_decay": 0.0, "params": no_decay_group, "lr_scale": lr_scale}, 46 | ], all_params_id 47 | 48 | 49 | def _transform_filter(filter: FilterType): 50 | """ 51 | Filter can be: 52 | - None: always returns False 53 | - function(name, p) -> True to activate, False to deactivate 54 | - list of strings to match, can have wildcard 55 | """ 56 | if filter is None: 57 | return lambda name, p: False 58 | elif callable(filter): 59 | return filter 60 | elif isinstance(filter, (str, list, tuple)): 61 | if isinstance(filter, str): 62 | filter = [filter] 63 | return lambda name, p: match_patterns(name, include=filter) 64 | else: 65 | raise ValueError(f"Invalid filter: {filter}") 66 | -------------------------------------------------------------------------------- /transic/learn/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BasePolicy 2 | from .distributions import GMMHead, CategoricalNet 3 | -------------------------------------------------------------------------------- /transic/learn/policy/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from pytorch_lightning import LightningModule 4 | 5 | 6 | class BasePolicy(ABC, LightningModule): 7 | is_sequence_policy: bool = False 8 | 9 | @abstractmethod 10 | def forward(self, *args, **kwargs): 11 | """ 12 | Forward the NN. 13 | """ 14 | pass 15 | 16 | @abstractmethod 17 | def act(self, *args, **kwargs): 18 | """ 19 | Given obs, return action. 20 | """ 21 | pass 22 | -------------------------------------------------------------------------------- /transic/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/nn/__init__.py -------------------------------------------------------------------------------- /transic/nn/features/__init__.py: -------------------------------------------------------------------------------- 1 | from .embedding import Embedding 2 | from .pointcloud import * 3 | from .fusion import SimpleFeatureFusion 4 | from .identity import Identity 5 | -------------------------------------------------------------------------------- /transic/nn/features/embedding.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Embedding as _Embedding 2 | 3 | 4 | class Embedding(_Embedding): 5 | def __init__(self, *args, **kwargs): 6 | super().__init__(*args, **kwargs) 7 | self.output_dim = self.embedding_dim 8 | -------------------------------------------------------------------------------- /transic/nn/features/fusion.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from transic.nn.mlp import build_mlp 7 | from transic.learn.optimizer_group import default_optimizer_groups 8 | 9 | 10 | class SimpleFeatureFusion(nn.Module): 11 | def __init__( 12 | self, 13 | extractors: dict[str, nn.Module], 14 | hidden_depth: int, 15 | hidden_dim: int, 16 | output_dim: int, 17 | activation, 18 | add_input_activation: bool, 19 | add_output_activation: bool, 20 | ): 21 | super().__init__() 22 | self._extractors = nn.ModuleDict(extractors) 23 | extractors_output_dim = sum(e.output_dim for e in extractors.values()) 24 | self.output_dim = output_dim 25 | self._head = build_mlp( 26 | input_dim=extractors_output_dim, 27 | hidden_dim=hidden_dim, 28 | output_dim=output_dim, 29 | hidden_depth=hidden_depth, 30 | activation=activation, 31 | weight_init="orthogonal", 32 | bias_init="zeros", 33 | norm_type=None, 34 | add_input_activation=add_input_activation, 35 | add_input_norm=False, 36 | add_output_activation=add_output_activation, 37 | add_output_norm=False, 38 | ) 39 | 40 | self._obs_groups = None 41 | self._obs_key_checked = False 42 | 43 | def _check_obs_key_match(self, obs: dict, strict: bool = False): 44 | if strict: 45 | assert set(self._extractors.keys()) == set(obs.keys()) 46 | elif set(self._extractors.keys()) != set(obs.keys()): 47 | print( 48 | f"[warning] obs key mismatch: {set(self._extractors.keys())} != {set(obs.keys())}" 49 | ) 50 | 51 | def forward(self, x): 52 | x = self._group_obs(x) 53 | if not self._obs_key_checked: 54 | self._check_obs_key_match(x, strict=False) 55 | self._obs_key_checked = True 56 | x = {k: v.forward(x[k]) for k, v in self._extractors.items()} 57 | x = torch.cat([x[k] for k in sorted(x.keys())], dim=-1) 58 | x = self._head(x) 59 | return x 60 | 61 | def _group_obs(self, obs): 62 | obs_keys = obs.keys() 63 | if self._obs_groups is None: 64 | # group by / 65 | obs_groups = {k.split("/")[0] for k in obs_keys} 66 | self._obs_groups = sorted(list(obs_groups)) 67 | obs_rtn = {} 68 | for g in self._obs_groups: 69 | is_subgroup = any(k.startswith(f"{g}/") for k in obs_keys) 70 | if is_subgroup: 71 | obs_rtn[g] = { 72 | k.split("/", 1)[1]: v 73 | for k, v in obs.items() 74 | if k.startswith(f"{g}/") 75 | } 76 | else: 77 | obs_rtn[g] = obs[g] 78 | return obs_rtn 79 | 80 | def get_optimizer_groups(self, weight_decay, lr_layer_decay, lr_scale=1.0): 81 | extractors_pgs, extractor_pids = [], [] 82 | for extractor in self._extractors.values(): 83 | pg, pid = extractor.get_optimizer_groups( 84 | weight_decay=weight_decay, 85 | lr_layer_decay=lr_layer_decay, 86 | lr_scale=lr_scale, 87 | ) 88 | extractors_pgs.extend(pg) 89 | extractor_pids.extend(pid) 90 | head_pg, head_pid = default_optimizer_groups( 91 | self, 92 | weight_decay=weight_decay, 93 | lr_scale=lr_scale, 94 | exclude_filter=lambda name, p: id(p) in extractor_pids, 95 | ) 96 | return extractors_pgs + head_pg, extractor_pids + head_pid 97 | -------------------------------------------------------------------------------- /transic/nn/features/identity.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Identity(nn.Module): 5 | def __init__( 6 | self, 7 | input_dim: int, 8 | ): 9 | super().__init__() 10 | self._output_dim = input_dim 11 | 12 | @property 13 | def output_dim(self): 14 | return self._output_dim 15 | 16 | def forward(self, x): 17 | return x 18 | 19 | def get_optimizer_groups(self, *args, **kwargs): 20 | return [], [] 21 | -------------------------------------------------------------------------------- /transic/nn/features/pointcloud/__init__.py: -------------------------------------------------------------------------------- 1 | from .pointnet import PointNet 2 | from .set_transformer import SetXFPCDEncoder 3 | -------------------------------------------------------------------------------- /transic/nn/features/pointcloud/pointnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from transic.nn.mlp import build_mlp 5 | from transic.learn.optimizer_group import default_optimizer_groups 6 | 7 | 8 | class _PointNetSimplified(nn.Module): 9 | def __init__( 10 | self, 11 | *, 12 | point_channels: int = 3, 13 | output_dim: int, 14 | hidden_dim: int, 15 | hidden_depth: int, 16 | activation: str = "gelu", 17 | ): 18 | super().__init__() 19 | self._mlp = build_mlp( 20 | input_dim=point_channels, 21 | hidden_dim=hidden_dim, 22 | output_dim=output_dim, 23 | hidden_depth=hidden_depth, 24 | activation=activation, 25 | ) 26 | self.output_dim = output_dim 27 | 28 | def forward(self, x): 29 | """ 30 | x: (..., points, point_channels) 31 | """ 32 | x = self._mlp(x) # (..., points, output_dim) 33 | x = torch.max(x, dim=-2)[0] # (..., output_dim) 34 | return x 35 | 36 | 37 | class PointNet(nn.Module): 38 | def __init__( 39 | self, 40 | *, 41 | n_coordinates: int = 3, 42 | add_ee_embd: bool = False, 43 | ee_embd_dim: int = 128, 44 | output_dim: int = 512, 45 | hidden_dim: int = 512, 46 | hidden_depth: int = 2, 47 | activation: str = "gelu", 48 | subtract_mean: bool = False, 49 | ): 50 | super().__init__() 51 | pn_in_channels = n_coordinates 52 | if add_ee_embd: 53 | pn_in_channels += ee_embd_dim 54 | if subtract_mean: 55 | pn_in_channels += n_coordinates 56 | self.pointnet = _PointNetSimplified( 57 | point_channels=pn_in_channels, 58 | output_dim=output_dim, 59 | hidden_dim=hidden_dim, 60 | hidden_depth=hidden_depth, 61 | activation=activation, 62 | ) 63 | self.ee_embd_layer = None 64 | if add_ee_embd: 65 | self.ee_embd_layer = nn.Embedding(2, embedding_dim=ee_embd_dim) 66 | self.add_ee_embd = add_ee_embd 67 | self.subtract_mean = subtract_mean 68 | self.output_dim = self.pointnet.output_dim 69 | 70 | def forward(self, x): 71 | """ 72 | x["coordinate"]: (..., points, coordinates) 73 | """ 74 | point = x["coordinate"] 75 | ee_mask = x.get("ee_mask", None) 76 | if self.subtract_mean: 77 | mean = torch.mean(point, dim=-2, keepdim=True) # (..., 1, coordinates) 78 | mean = torch.broadcast_to(mean, point.shape) # (..., points, coordinates) 79 | point = point - mean 80 | point = torch.cat([point, mean], dim=-1) # (..., points, 2 * coordinates) 81 | if self.add_ee_embd: 82 | ee_mask = torch.tensor(ee_mask, dtype=torch.long) # (..., points) 83 | ee_embd = self.ee_embd_layer(ee_mask) # (..., points, ee_embd_dim) 84 | x = torch.concat( 85 | [point, ee_embd], dim=-1 86 | ) # (..., points, coordinates + ee_embd_dim) 87 | return self.pointnet(x) 88 | 89 | def get_optimizer_groups(self, weight_decay, lr_layer_decay, lr_scale=1.0): 90 | pg, pids = default_optimizer_groups( 91 | self, 92 | weight_decay=weight_decay, 93 | lr_scale=lr_scale, 94 | no_decay_filter=["ee_embd_layer.*"], 95 | ) 96 | return pg, pids 97 | -------------------------------------------------------------------------------- /transic/nn/features/pointcloud/set_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .set_xf_pcd_encoder import SetXFPCDEncoder 2 | -------------------------------------------------------------------------------- /transic/nn/features/pointcloud/set_transformer/set_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | https://github.com/juho-lee/set_transformer 3 | Paper: Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks 4 | """ 5 | from __future__ import annotations 6 | from typing import Literal 7 | 8 | import torch 9 | import torch.nn as nn 10 | import math 11 | from einops import rearrange 12 | 13 | 14 | __all__ = [ 15 | "SetAttention", 16 | "SelfSetAttention", 17 | "InducedSetAttention", 18 | "PoolingSetAttention", 19 | "IdentityKeyValuePoolingAttention", 20 | ] 21 | 22 | 23 | class SetAttention(nn.Module): 24 | """ 25 | "MAB" in the original paper 26 | """ 27 | 28 | def __init__( 29 | self, 30 | dim_Q, 31 | dim_K, 32 | dim_V, 33 | num_heads, 34 | layer_norm=False, 35 | ): 36 | """ 37 | Args: 38 | identity_key: do not transform K, use nn.Identity(), useful for attention 39 | pooling where key is the original features and we don't want to transform it. 40 | See CoCa paper: https://arxiv.org/abs/2205.01917 41 | """ 42 | super().__init__() 43 | self.dim_V = dim_V 44 | self.num_heads = num_heads 45 | assert self.dim_V % self.num_heads == 0 46 | self.fc_q = nn.Linear(dim_Q, dim_V) 47 | self.fc_k = nn.Linear(dim_K, dim_V) 48 | self.fc_v = nn.Linear(dim_K, dim_V) 49 | if layer_norm: 50 | self.ln0 = nn.LayerNorm(dim_V) 51 | self.ln1 = nn.LayerNorm(dim_V) 52 | else: 53 | self.ln0 = nn.Identity() 54 | self.ln1 = nn.Identity() 55 | self.fc_o = nn.Linear(dim_V, dim_V) 56 | self.act = nn.ReLU(inplace=True) 57 | 58 | def forward(self, Q, K, mask=None): 59 | """ 60 | mask: if not none, should be (B, L_src, L_trg) 61 | """ 62 | if mask is not None: 63 | assert mask.shape[0] == Q.shape[0] 64 | assert mask.shape[1] == Q.shape[1] 65 | assert mask.shape[2] == K.shape[1] 66 | # check valid mask 67 | assert mask.dtype == torch.bool 68 | assert torch.all( 69 | mask.sum(dim=2) > 0 70 | ), "each source token should attend to at least one target token" 71 | # repeat mask num_heads times 72 | mask = torch.cat([mask] * self.num_heads, 0) 73 | Q = self.fc_q(Q) 74 | K, V = self.fc_k(K), self.fc_v(K) 75 | 76 | dim_split = self.dim_V // self.num_heads 77 | Q_ = torch.cat(Q.split(dim_split, 2), 0) 78 | K_ = torch.cat(K.split(dim_split, 2), 0) 79 | V_ = torch.cat(V.split(dim_split, 2), 0) 80 | 81 | A = Q_.bmm(K_.transpose(1, 2)) / math.sqrt(self.dim_V) 82 | if mask is not None: 83 | A.masked_fill_(mask == 0, -float("inf")) 84 | A = torch.softmax(A, 2) 85 | O = torch.cat((Q_ + A.bmm(V_)).split(Q.size(0), 0), 2) 86 | O = self.ln0(O) 87 | O = O + self.act(self.fc_o(O)) 88 | O = self.ln1(O) 89 | return O 90 | 91 | 92 | class SelfSetAttention(SetAttention): 93 | """ 94 | "SAB" in the original paper 95 | """ 96 | 97 | def forward(self, X): 98 | return super().forward(X, X) 99 | 100 | 101 | class InducedSetAttention(nn.Module): 102 | """ 103 | "ISAB" in the original paper 104 | """ 105 | 106 | def __init__( 107 | self, 108 | dim_in, 109 | dim_out, 110 | num_heads, 111 | num_queries, 112 | layer_norm=False, 113 | ): 114 | super().__init__() 115 | self.I = nn.Parameter(torch.Tensor(1, num_queries, dim_out)) 116 | nn.init.xavier_uniform_(self.I) 117 | self.mab0 = SetAttention( 118 | dim_Q=dim_out, 119 | dim_K=dim_in, 120 | dim_V=dim_out, 121 | num_heads=num_heads, 122 | layer_norm=layer_norm, 123 | ) 124 | self.mab1 = SetAttention( 125 | dim_Q=dim_in, 126 | dim_K=dim_out, 127 | dim_V=dim_out, 128 | num_heads=num_heads, 129 | layer_norm=layer_norm, 130 | ) 131 | 132 | def forward(self, X): 133 | H = self.mab0(self.I.repeat(X.size(0), 1, 1), X) 134 | return self.mab1(X, H) 135 | 136 | 137 | class PoolingSetAttention(nn.Module): 138 | """ 139 | "PMA" in the original paper 140 | """ 141 | 142 | def __init__( 143 | self, 144 | dim, 145 | num_heads, 146 | num_queries, 147 | pool_type: Literal["avg", "concat", "none", None] = None, 148 | layer_norm=False, 149 | ): 150 | """ 151 | Args: 152 | num_queries: pools the original set into `num_queries` features 153 | pool_type: 'avg', 'concat', or None 154 | - 'avg': average pooling, returns [B, dim] 155 | - 'max': max pooling, returns [B, dim] 156 | - 'concat': concatenate the pooled features, returns [B, num_queries*dim] 157 | - None: don't pool and returns [B, num_queries, dim] 158 | """ 159 | super().__init__() 160 | assert pool_type in ["avg", "concat", "none", "max", None] 161 | self._pool_type = pool_type 162 | self.S = nn.Parameter(torch.Tensor(1, num_queries, dim)) 163 | nn.init.xavier_uniform_(self.S) 164 | self.mab = SetAttention( 165 | dim, 166 | dim, 167 | dim, 168 | num_heads=num_heads, 169 | layer_norm=layer_norm, 170 | ) 171 | 172 | def forward(self, X, mask=None): 173 | O = self.mab(self.S.repeat(X.size(0), 1, 1), X, mask) 174 | if self._pool_type == "avg": 175 | return O.mean(dim=1) 176 | elif self._pool_type == "max": 177 | return O.max(dim=1)[0] 178 | elif self._pool_type == "concat": 179 | return rearrange(O, "b q d -> b (q d)") 180 | elif self._pool_type in ["none", None]: 181 | return O 182 | else: 183 | raise ValueError(f"Unknown pool_type: {self._pool_type}") 184 | 185 | 186 | class IdentityKeyValuePoolingAttention(nn.Module): 187 | """ 188 | The key/value are identity functions as the original features, and only 189 | the query (external inducing point) is learned. 190 | See CoCa paper: https://arxiv.org/abs/2205.01917 191 | """ 192 | 193 | def __init__(self, dim, num_heads, num_queries=1): 194 | """ 195 | Args: 196 | """ 197 | super().__init__() 198 | self.Q = nn.Parameter(torch.Tensor(1, num_queries, dim)) 199 | nn.init.xavier_uniform_(self.Q) 200 | self.dim = dim 201 | self.num_heads = num_heads 202 | assert self.dim % self.num_heads == 0 203 | self._extra_repr = dict(dim=dim, num_heads=num_heads, num_queries=num_queries) 204 | 205 | def forward(self, V): 206 | # V: [B, L, D], L is sequence length 207 | B, L, D = V.size() 208 | assert D == self.dim 209 | batch_size = V.size(0) 210 | Q = self.Q.repeat(batch_size, 1, 1) 211 | K = V # K and V are both identity functions from the original features 212 | 213 | dim_split = self.dim // self.num_heads 214 | Q_ = torch.cat(Q.split(dim_split, 2), 0) 215 | K_ = torch.cat(K.split(dim_split, 2), 0) 216 | V_ = torch.cat(V.split(dim_split, 2), 0) 217 | 218 | A = torch.softmax(Q_.bmm(K_.transpose(1, 2)) / math.sqrt(self.dim), 2) 219 | O = A.bmm(V_) 220 | O = rearrange(O, "(nh b) q d -> b q (nh d)", b=batch_size) 221 | return O.mean(1) # average over number of query vector features 222 | 223 | def extra_repr(self) -> str: 224 | return ", ".join(f"{k}={v}" for k, v in self._extra_repr.items()) 225 | -------------------------------------------------------------------------------- /transic/nn/features/pointcloud/set_transformer/set_xf_pcd_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from transic.nn.features.pointcloud.set_transformer.set_transformer import ( 5 | PoolingSetAttention, 6 | ) 7 | 8 | 9 | class SetXFPCDEncoder(nn.Module): 10 | def __init__( 11 | self, 12 | *, 13 | n_coordinates: int = 3, 14 | add_ee_embd: bool = False, 15 | ee_embd_dim: int = 128, 16 | hidden_dim: int = 512, 17 | subtract_mean: bool = False, 18 | set_xf_num_heads: int = 8, 19 | set_xf_num_queries: int = 8, 20 | set_xf_pool_type, 21 | set_xf_layer_norm: bool = False, 22 | ): 23 | super().__init__() 24 | pn_in_channels = n_coordinates 25 | if add_ee_embd: 26 | pn_in_channels += ee_embd_dim 27 | if subtract_mean: 28 | pn_in_channels += n_coordinates 29 | 30 | self.linear = nn.Linear(pn_in_channels, hidden_dim) 31 | self.num_queries = set_xf_num_queries 32 | self.set_xf = PoolingSetAttention( 33 | dim=hidden_dim, 34 | num_heads=set_xf_num_heads, 35 | num_queries=set_xf_num_queries, 36 | pool_type=set_xf_pool_type, 37 | layer_norm=set_xf_layer_norm, 38 | ) 39 | self.ee_embd_layer = None 40 | if add_ee_embd: 41 | self.ee_embd_layer = nn.Embedding(2, embedding_dim=ee_embd_dim) 42 | self.add_ee_embd = add_ee_embd 43 | self.subtract_mean = subtract_mean 44 | if set_xf_pool_type == "concat": 45 | self.output_dim = hidden_dim * set_xf_num_queries 46 | else: 47 | self.output_dim = hidden_dim 48 | 49 | def forward(self, x): 50 | """ 51 | x["coordinate"]: (..., points, coordinates) 52 | x["ee_mask"]: (..., points) if present 53 | x["pad_mask"]: (..., points) if present (for variable length point clouds) 54 | """ 55 | point = x["coordinate"] 56 | leading_dims = point.shape[:-2] 57 | point = point.reshape(-1, *point.shape[-2:]) 58 | ee_mask = x.get("ee_mask", None) 59 | if ee_mask is not None: 60 | ee_mask = ee_mask.reshape(-1, *ee_mask.shape[-1:]) 61 | pad_mask = x.get("pad_mask", None) 62 | if pad_mask is not None: 63 | pad_mask = pad_mask.reshape(-1, *pad_mask.shape[-1:]) 64 | pad_mask = pad_mask.to(dtype=torch.bool) 65 | pad_mask = pad_mask.unsqueeze(1) # (..., 1, points) 66 | pad_mask = pad_mask.repeat( 67 | 1, self.num_queries, 1 68 | ) # (..., num_queries, points) 69 | if self.subtract_mean: 70 | mean = torch.mean(point, dim=-2, keepdim=True) # (..., 1, coordinates) 71 | mean = torch.broadcast_to(mean, point.shape) # (..., points, coordinates) 72 | point = point - mean 73 | point = torch.cat([point, mean], dim=-1) # (..., points, 2 * coordinates) 74 | if self.add_ee_embd: 75 | ee_mask = ee_mask.to(dtype=torch.long) # (..., points) 76 | ee_embd = self.ee_embd_layer(ee_mask) # (..., points, ee_embd_dim) 77 | point = torch.concat( 78 | [point, ee_embd], dim=-1 79 | ) # (..., points, coordinates + ee_embd_dim) 80 | point = self.linear(point) # (..., points, hidden_dim) 81 | output = self.set_xf(point, mask=pad_mask) # (..., self.output_dim) 82 | # recover leading dimensions 83 | output = output.reshape(*leading_dims, *output.shape[-1:]) 84 | return output 85 | -------------------------------------------------------------------------------- /transic/nn/mlp.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import torch.nn as nn 4 | from typing import Callable, Literal 5 | 6 | 7 | def get_activation(activation: str | Callable | None) -> Callable: 8 | if not activation: 9 | return nn.Identity 10 | elif callable(activation): 11 | return activation 12 | ACT_LAYER = { 13 | "tanh": nn.Tanh, 14 | "relu": lambda: nn.ReLU(inplace=True), 15 | "leaky_relu": lambda: nn.LeakyReLU(inplace=True), 16 | "swish": lambda: nn.SiLU(inplace=True), # SiLU is alias for Swish 17 | "sigmoid": nn.Sigmoid, 18 | "elu": lambda: nn.ELU(inplace=True), 19 | "gelu": nn.GELU, 20 | } 21 | activation = activation.lower() 22 | assert activation in ACT_LAYER, f"Supported activations: {ACT_LAYER.keys()}" 23 | return ACT_LAYER[activation] 24 | 25 | 26 | def get_initializer(method: str | Callable, activation: str) -> Callable: 27 | if isinstance(method, str): 28 | assert hasattr( 29 | nn.init, f"{method}_" 30 | ), f"Initializer nn.init.{method}_ does not exist" 31 | if method == "orthogonal": 32 | try: 33 | gain = nn.init.calculate_gain(activation) 34 | except ValueError: 35 | gain = 1.0 36 | return lambda x: nn.init.orthogonal_(x, gain=gain) 37 | else: 38 | return getattr(nn.init, f"{method}_") 39 | else: 40 | assert callable(method) 41 | return method 42 | 43 | 44 | def build_mlp( 45 | input_dim, 46 | *, 47 | hidden_dim: int, 48 | output_dim: int, 49 | hidden_depth: int = None, 50 | num_layers: int = None, 51 | activation: str | Callable = "relu", 52 | weight_init: str | Callable = "orthogonal", 53 | bias_init="zeros", 54 | norm_type: Literal["batchnorm", "layernorm"] | None = None, 55 | add_input_activation: bool | str | Callable = False, 56 | add_input_norm: bool = False, 57 | add_output_activation: bool | str | Callable = False, 58 | add_output_norm: bool = False, 59 | ) -> nn.Sequential: 60 | """ 61 | In other popular RL implementations, tanh is typically used with orthogonal 62 | initialization, which may perform better than ReLU. 63 | 64 | Args: 65 | norm_type: None, "batchnorm", "layernorm", applied to intermediate layers 66 | add_input_activation: whether to add a nonlinearity to the input _before_ 67 | the MLP computation. This is useful for processing a feature from a preceding 68 | image encoder, for example. Image encoder typically has a linear layer 69 | at the end, and we don't want the MLP to immediately stack another linear 70 | layer on the input features. 71 | - True to add the same activation as the rest of the MLP 72 | - str to add an activation of a different type. 73 | add_input_norm: see `add_input_activation`, whether to add a normalization layer 74 | to the input _before_ the MLP computation. 75 | values: True to add the `norm_type` to the input 76 | add_output_activation: whether to add a nonlinearity to the output _after_ the 77 | MLP computation. 78 | - True to add the same activation as the rest of the MLP 79 | - str to add an activation of a different type. 80 | add_output_norm: see `add_output_activation`, whether to add a normalization layer 81 | _after_ the MLP computation. 82 | values: True to add the `norm_type` to the input 83 | """ 84 | assert (hidden_depth is None) != (num_layers is None), ( 85 | "Either hidden_depth or num_layers must be specified, but not both. " 86 | "num_layers is defined as hidden_depth+1" 87 | ) 88 | if hidden_depth is not None: 89 | assert hidden_depth >= 0 90 | if num_layers is not None: 91 | assert num_layers >= 1 92 | act_layer = get_activation(activation) 93 | 94 | weight_init = get_initializer(weight_init, activation) 95 | bias_init = get_initializer(bias_init, activation) 96 | 97 | if norm_type is not None: 98 | norm_type = norm_type.lower() 99 | 100 | if not norm_type: 101 | norm_type = nn.Identity 102 | elif norm_type == "batchnorm": 103 | norm_type = nn.BatchNorm1d 104 | elif norm_type == "layernorm": 105 | norm_type = nn.LayerNorm 106 | else: 107 | raise ValueError(f"Unsupported norm layer: {norm_type}") 108 | 109 | hidden_depth = num_layers - 1 if hidden_depth is None else hidden_depth 110 | if hidden_depth == 0: 111 | mods = [nn.Linear(input_dim, output_dim)] 112 | else: 113 | mods = [nn.Linear(input_dim, hidden_dim), norm_type(hidden_dim), act_layer()] 114 | for i in range(hidden_depth - 1): 115 | mods += [ 116 | nn.Linear(hidden_dim, hidden_dim), 117 | norm_type(hidden_dim), 118 | act_layer(), 119 | ] 120 | mods.append(nn.Linear(hidden_dim, output_dim)) 121 | 122 | if add_input_norm: 123 | mods = [norm_type(input_dim)] + mods 124 | if add_input_activation: 125 | if add_input_activation is not True: 126 | act_layer = get_activation(add_input_activation) 127 | mods = [act_layer()] + mods 128 | if add_output_norm: 129 | mods.append(norm_type(output_dim)) 130 | if add_output_activation: 131 | if add_output_activation is not True: 132 | act_layer = get_activation(add_output_activation) 133 | mods.append(act_layer()) 134 | 135 | for mod in mods: 136 | if isinstance(mod, nn.Linear): 137 | weight_init(mod.weight) 138 | bias_init(mod.bias) 139 | 140 | return nn.Sequential(*mods) 141 | 142 | 143 | class MLP(nn.Module): 144 | def __init__( 145 | self, 146 | input_dim, 147 | *, 148 | hidden_dim: int, 149 | output_dim: int, 150 | hidden_depth: int = None, 151 | num_layers: int = None, 152 | activation: str | Callable = "relu", 153 | weight_init: str | Callable = "orthogonal", 154 | bias_init="zeros", 155 | norm_type: Literal["batchnorm", "layernorm"] | None = None, 156 | add_input_activation: bool | str | Callable = False, 157 | add_input_norm: bool = False, 158 | add_output_activation: bool | str | Callable = False, 159 | add_output_norm: bool = False, 160 | ): 161 | super().__init__() 162 | # delegate to build_mlp by keywords 163 | self.layers = build_mlp( 164 | input_dim, 165 | hidden_dim=hidden_dim, 166 | output_dim=output_dim, 167 | hidden_depth=hidden_depth, 168 | num_layers=num_layers, 169 | activation=activation, 170 | weight_init=weight_init, 171 | bias_init=bias_init, 172 | norm_type=norm_type, 173 | add_input_activation=add_input_activation, 174 | add_input_norm=add_input_norm, 175 | add_output_activation=add_output_activation, 176 | add_output_norm=add_output_norm, 177 | ) 178 | # add attributes to the class 179 | self.input_dim = input_dim 180 | self.output_dim = output_dim 181 | self.hidden_depth = hidden_depth 182 | self.activation = activation 183 | self.weight_init = weight_init 184 | self.bias_init = bias_init 185 | self.norm_type = norm_type 186 | if add_input_activation is True: 187 | self.input_activation = activation 188 | else: 189 | self.input_activation = add_input_activation 190 | if add_input_norm is True: 191 | self.input_norm_type = norm_type 192 | else: 193 | self.input_norm_type = None 194 | # do the same for output activation and norm 195 | if add_output_activation is True: 196 | self.output_activation = activation 197 | else: 198 | self.output_activation = add_output_activation 199 | if add_output_norm is True: 200 | self.output_norm_type = norm_type 201 | else: 202 | self.output_norm_type = None 203 | 204 | def forward(self, x): 205 | return self.layers(x) 206 | -------------------------------------------------------------------------------- /transic/real_world/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/real_world/__init__.py -------------------------------------------------------------------------------- /transic/residual/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/residual/__init__.py -------------------------------------------------------------------------------- /transic/residual/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_module import ResidualDataModule 2 | -------------------------------------------------------------------------------- /transic/residual/data/collate.py: -------------------------------------------------------------------------------- 1 | from math import ceil 2 | from copy import deepcopy 3 | 4 | import numpy as np 5 | 6 | from transic.utils.array import ( 7 | get_batch_size, 8 | any_slice, 9 | any_stack, 10 | any_concat, 11 | any_ones_like, 12 | any_to_torch_tensor, 13 | nested_np_split, 14 | ) 15 | 16 | 17 | def collate_fn( 18 | sample_list, 19 | ctx_len: int = 5, 20 | ): 21 | """ 22 | sample_list: List of Dict[str, np.ndarray] 23 | """ 24 | L_max = max(get_batch_size(sample) for sample in sample_list) 25 | N_chunks = ceil(L_max / ctx_len) 26 | L_pad_max = N_chunks * ctx_len 27 | 28 | sample_structure = deepcopy(any_slice(sample_list[0], np.s_[0:1])) 29 | # pad to max length in this batch 30 | processed_main_data = any_stack( 31 | [ 32 | any_concat( 33 | [ 34 | sample, 35 | ] 36 | + [any_ones_like(sample_structure)] 37 | * (L_pad_max - get_batch_size(sample)), 38 | dim=0, 39 | ) 40 | for sample in sample_list 41 | ], 42 | dim=0, 43 | ) # dict of (B, L_pad_max, ...) 44 | # construct mask 45 | mask = any_stack( 46 | [ 47 | any_concat( 48 | [ 49 | np.ones((get_batch_size(sample),), dtype=bool), 50 | np.zeros((L_pad_max - get_batch_size(sample),), dtype=bool), 51 | ] 52 | ) 53 | for sample in sample_list 54 | ], 55 | dim=0, 56 | ) # (B, L_pad_max) 57 | 58 | # split into chunks 59 | processed_main_data = { 60 | k: any_stack(v, dim=0) 61 | for k, v in nested_np_split(processed_main_data, N_chunks, axis=1).items() 62 | } # dict of (N_chunks, B, ctx_len, ...) 63 | mask = any_stack(np.split(mask, N_chunks, axis=1), dim=0) # (N_chunks, B, ctx_len) 64 | processed_main_data["pad_mask"] = mask 65 | 66 | # convert to tensor 67 | processed_main_data = { 68 | k: any_to_torch_tensor(v) for k, v in processed_main_data.items() 69 | } 70 | return processed_main_data 71 | -------------------------------------------------------------------------------- /transic/residual/data/data_module.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Literal, List 2 | from functools import partial 3 | 4 | import torch 5 | from torch.utils.data import DataLoader 6 | from pytorch_lightning import LightningDataModule 7 | 8 | from transic.residual.data.dataset import ResidualDataset, ResidualSeqDataset 9 | from transic.residual.data.collate import collate_fn as _collate_fn 10 | 11 | 12 | class ResidualDataModule(LightningDataModule): 13 | def __init__( 14 | self, 15 | *, 16 | data_dir: str, 17 | variable_len_pcd_handle_strategy: Literal["pad", "truncate"], 18 | include_grasp_action: bool, 19 | gripper_close_width: float, 20 | gripper_open_width: float = 0.08, 21 | ctx_len: int = -1, # -1 means not using the SeqDataset at all 22 | seed: Optional[int] = None, 23 | batch_size: int, 24 | val_batch_size: Optional[int], 25 | train_portion: float = 0.9, 26 | dataloader_num_workers: int, 27 | ): 28 | super().__init__() 29 | 30 | self._data_dir = data_dir 31 | self._variable_len_pcd_handle_strategy = variable_len_pcd_handle_strategy 32 | self._include_grasp_action = include_grasp_action 33 | self._gripper_close_width = gripper_close_width 34 | self._gripper_open_width = gripper_open_width 35 | self._seed = seed 36 | self._bs = batch_size 37 | self._vbs = val_batch_size or batch_size 38 | self._train_portion = train_portion 39 | self._dataloader_num_workers = dataloader_num_workers 40 | 41 | self._ds_cls = ResidualSeqDataset if ctx_len != -1 else ResidualDataset 42 | self._collate_fn = ( 43 | partial(_collate_fn, ctx_len=ctx_len) if ctx_len != -1 else None 44 | ) 45 | 46 | self._train_dataset, self._val_dataset = None, None 47 | self._P_intervention = None 48 | 49 | @property 50 | def P_intervention(self): 51 | assert self._P_intervention is not None, "Call setup() first" 52 | return self._P_intervention 53 | 54 | def setup(self, stage: str) -> None: 55 | if stage == "fit" or stage is None: 56 | ds = self._ds_cls( 57 | data_dir=self._data_dir, 58 | variable_len_pcd_handle_strategy=self._variable_len_pcd_handle_strategy, 59 | include_grasp_action=self._include_grasp_action, 60 | gripper_close_width=self._gripper_close_width, 61 | gripper_open_width=self._gripper_open_width, 62 | seed=self._seed, 63 | ) 64 | self._P_intervention = ds.P_intervention 65 | self._train_dataset, self._val_dataset = _sequential_split_dataset( 66 | ds, split_portions=[self._train_portion, 1 - self._train_portion] 67 | ) 68 | 69 | def train_dataloader(self): 70 | return DataLoader( 71 | self._train_dataset, 72 | batch_size=self._bs, 73 | num_workers=min(self._bs, self._dataloader_num_workers), 74 | pin_memory=True, 75 | persistent_workers=True, 76 | collate_fn=self._collate_fn, 77 | ) 78 | 79 | def val_dataloader(self): 80 | return DataLoader( 81 | self._val_dataset, 82 | batch_size=self._vbs, 83 | num_workers=min(self._vbs, self._dataloader_num_workers), 84 | pin_memory=True, 85 | persistent_workers=True, 86 | collate_fn=self._collate_fn, 87 | ) 88 | 89 | 90 | def _accumulate(iterable, fn=lambda x, y: x + y): 91 | """ 92 | Return running totals 93 | # _accumulate([1,2,3,4,5]) --> 1 3 6 10 15 94 | # _accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120 95 | """ 96 | it = iter(iterable) 97 | try: 98 | total = next(it) 99 | except StopIteration: 100 | return 101 | yield total 102 | for element in it: 103 | total = fn(total, element) 104 | yield total 105 | 106 | 107 | def _sequential_split_dataset( 108 | dataset: torch.utils.data.Dataset, split_portions: List[float] 109 | ): 110 | """ 111 | Split a dataset into multiple datasets, each with a different portion of the 112 | original dataset. Uses torch.utils.data.Subset. 113 | """ 114 | assert len(split_portions) > 0, "split_portions must be a non-empty list" 115 | assert all(0.0 <= p <= 1.0 for p in split_portions), f"{split_portions=}" 116 | assert abs(sum(split_portions) - 1.0) < 1e-6, f"{sum(split_portions)=} != 1.0" 117 | L = len(dataset) 118 | assert L > 0, "dataset must be non-empty" 119 | # split the list with proportions 120 | lengths = [int(p * L) for p in split_portions] 121 | # make sure the last split fills the full dataset 122 | lengths[-1] += L - sum(lengths) 123 | indices = list(range(L)) 124 | 125 | return [ 126 | torch.utils.data.Subset(dataset, indices[offset - length : offset]) 127 | for offset, length in zip(_accumulate(lengths), lengths) 128 | ] 129 | -------------------------------------------------------------------------------- /transic/residual/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from .perceiver_residual_policy import PerceiverResidualPolicy 2 | from .pointnet_residual_policy import PointNetResidualPolicy 3 | -------------------------------------------------------------------------------- /transic/residual/policy/perceiver_residual_policy.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | from transic.learn.policy import GMMHead, CategoricalNet 6 | from transic.learn.policy import BasePolicy 7 | from transic.nn.features import ( 8 | SimpleFeatureFusion, 9 | SetXFPCDEncoder, 10 | Identity, 11 | Embedding, 12 | ) 13 | from transic.utils.torch_utils import load_state_dict, freeze_params 14 | 15 | 16 | class PerceiverResidualPolicy(BasePolicy): 17 | def __init__( 18 | self, 19 | *, 20 | point_channels: int = 3, 21 | subtract_point_mean: bool = False, 22 | add_ee_embd: bool = False, 23 | ee_embd_dim: int, 24 | set_xf_hidden_dim: int, 25 | set_xf_num_heads: int, 26 | set_xf_num_queries: int, 27 | set_xf_pool_type: str, 28 | set_xf_layer_norm: bool, 29 | prop_input_dim: int, 30 | robot_policy_output_dim: int, 31 | include_robot_policy_gripper_action_input: bool, 32 | robot_policy_gripper_action_embd_dim: int, 33 | feature_fusion_hidden_depth: int = 1, 34 | feature_fusion_hidden_dim: int = 256, 35 | feature_fusion_output_dim: int = 256, 36 | feature_fusion_activation: str = "relu", 37 | feature_fusion_add_input_activation: bool = False, 38 | feature_fusion_add_output_activation: bool = False, 39 | action_dim: int, 40 | action_net_gmm_n_modes: int = 5, 41 | action_net_hidden_dim: int, 42 | action_net_hidden_depth: int, 43 | action_net_activation: str = "relu", 44 | intervention_head_hidden_dim: int, 45 | intervention_head_hidden_depth: int, 46 | intervention_head_activation: str = "relu", 47 | deterministic_inference: bool = True, 48 | gmm_low_noise_eval: bool = True, 49 | update_intervention_head_only: bool = False, 50 | ckpt_path_if_update_intervention_head_only: str = None, 51 | ): 52 | super().__init__() 53 | 54 | extractors = { 55 | "pcd": SetXFPCDEncoder( 56 | n_coordinates=point_channels, 57 | add_ee_embd=add_ee_embd, 58 | ee_embd_dim=ee_embd_dim, 59 | hidden_dim=set_xf_hidden_dim, 60 | subtract_mean=subtract_point_mean, 61 | set_xf_num_heads=set_xf_num_heads, 62 | set_xf_num_queries=set_xf_num_queries, 63 | set_xf_pool_type=set_xf_pool_type, 64 | set_xf_layer_norm=set_xf_layer_norm, 65 | ), 66 | "proprioception": Identity(prop_input_dim), 67 | "robot_policy_action": Identity(robot_policy_output_dim), 68 | } 69 | if include_robot_policy_gripper_action_input: 70 | extractors["robot_policy_gripper_action"] = Embedding( 71 | num_embeddings=2, # open/close 72 | embedding_dim=robot_policy_gripper_action_embd_dim, 73 | ) 74 | 75 | self.feature_extractor = SimpleFeatureFusion( 76 | extractors=extractors, 77 | hidden_depth=feature_fusion_hidden_depth, 78 | hidden_dim=feature_fusion_hidden_dim, 79 | output_dim=feature_fusion_output_dim, 80 | activation=feature_fusion_activation, 81 | add_input_activation=feature_fusion_add_input_activation, 82 | add_output_activation=feature_fusion_add_output_activation, 83 | ) 84 | 85 | self.action_net = GMMHead( 86 | feature_fusion_output_dim, 87 | n_modes=action_net_gmm_n_modes, 88 | action_dim=action_dim, 89 | hidden_dim=action_net_hidden_dim, 90 | hidden_depth=action_net_hidden_depth, 91 | activation=action_net_activation, 92 | low_noise_eval=gmm_low_noise_eval, 93 | ) 94 | self.intervention_head = CategoricalNet( 95 | feature_fusion_output_dim, 96 | action_dim=2, # intervention or not 97 | hidden_dim=intervention_head_hidden_dim, 98 | hidden_depth=intervention_head_hidden_depth, 99 | activation=intervention_head_activation, 100 | ) 101 | if update_intervention_head_only: 102 | assert os.path.exists(ckpt_path_if_update_intervention_head_only) 103 | ckpt = torch.load( 104 | ckpt_path_if_update_intervention_head_only, map_location="cpu" 105 | ) 106 | 107 | feature_extractor_weighs = { 108 | k: v 109 | for k, v in ckpt["state_dict"].items() 110 | if k.startswith("residual_policy.feature_extractor") 111 | } 112 | load_state_dict( 113 | self.feature_extractor, 114 | feature_extractor_weighs, 115 | strip_prefix="residual_policy.feature_extractor.", 116 | strict=True, 117 | ) 118 | freeze_params(self.feature_extractor) 119 | 120 | action_net_weights = { 121 | k: v 122 | for k, v in ckpt["state_dict"].items() 123 | if k.startswith("residual_policy.action_net") 124 | } 125 | load_state_dict( 126 | self.action_net, 127 | action_net_weights, 128 | strip_prefix="residual_policy.action_net.", 129 | strict=True, 130 | ) 131 | freeze_params(self.action_net) 132 | 133 | self._deterministic_inference = deterministic_inference 134 | 135 | def forward(self, obs): 136 | feature = self.feature_extractor(obs) 137 | action_dist = self.action_net(feature) 138 | intervention_dist = self.intervention_head(feature) 139 | return action_dist, intervention_dist 140 | 141 | @torch.no_grad() 142 | def act(self, obs, deterministic=None): 143 | action_dist, intervention_dist = self.forward(obs) 144 | if deterministic is None: 145 | deterministic = self._deterministic_inference 146 | if deterministic: 147 | return action_dist.mode(), intervention_dist.mode() 148 | else: 149 | return action_dist.sample(), intervention_dist.sample() 150 | -------------------------------------------------------------------------------- /transic/residual/policy/pointnet_residual_policy.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | from transic.learn.policy import GMMHead, CategoricalNet 6 | from transic.learn.policy import BasePolicy 7 | from transic.nn.features import SimpleFeatureFusion, PointNet, Identity, Embedding 8 | from transic.utils.torch_utils import load_state_dict, freeze_params 9 | 10 | 11 | class PointNetResidualPolicy(BasePolicy): 12 | def __init__( 13 | self, 14 | *, 15 | point_channels: int = 3, 16 | subtract_point_mean: bool = False, 17 | add_ee_embd: bool = False, 18 | ee_embd_dim: int, 19 | pointnet_output_dim: int, 20 | pointnet_hidden_dim: int, 21 | pointnet_hidden_depth: int, 22 | pointnet_activation: str = "gelu", 23 | prop_input_dim: int, 24 | robot_policy_output_dim: int, 25 | include_robot_policy_gripper_action_input: bool, 26 | robot_policy_gripper_action_embd_dim: int, 27 | feature_fusion_hidden_depth: int = 1, 28 | feature_fusion_hidden_dim: int = 256, 29 | feature_fusion_output_dim: int = 256, 30 | feature_fusion_activation: str = "relu", 31 | feature_fusion_add_input_activation: bool = False, 32 | feature_fusion_add_output_activation: bool = False, 33 | action_dim: int, 34 | action_net_gmm_n_modes: int = 5, 35 | action_net_hidden_dim: int, 36 | action_net_hidden_depth: int, 37 | action_net_activation: str = "relu", 38 | intervention_head_hidden_dim: int, 39 | intervention_head_hidden_depth: int, 40 | intervention_head_activation: str = "relu", 41 | deterministic_inference: bool = True, 42 | gmm_low_noise_eval: bool = True, 43 | update_intervention_head_only: bool = False, 44 | ckpt_path_if_update_intervention_head_only: str = None, 45 | ): 46 | super().__init__() 47 | 48 | extractors = { 49 | "pcd": PointNet( 50 | n_coordinates=point_channels, 51 | add_ee_embd=add_ee_embd, 52 | ee_embd_dim=ee_embd_dim, 53 | output_dim=pointnet_output_dim, 54 | hidden_dim=pointnet_hidden_dim, 55 | hidden_depth=pointnet_hidden_depth, 56 | activation=pointnet_activation, 57 | subtract_mean=subtract_point_mean, 58 | ), 59 | "proprioception": Identity(prop_input_dim), 60 | "robot_policy_action": Identity(robot_policy_output_dim), 61 | } 62 | if include_robot_policy_gripper_action_input: 63 | extractors["robot_policy_gripper_action"] = Embedding( 64 | num_embeddings=2, # open/close 65 | embedding_dim=robot_policy_gripper_action_embd_dim, 66 | ) 67 | self.feature_extractor = SimpleFeatureFusion( 68 | extractors=extractors, 69 | hidden_depth=feature_fusion_hidden_depth, 70 | hidden_dim=feature_fusion_hidden_dim, 71 | output_dim=feature_fusion_output_dim, 72 | activation=feature_fusion_activation, 73 | add_input_activation=feature_fusion_add_input_activation, 74 | add_output_activation=feature_fusion_add_output_activation, 75 | ) 76 | 77 | self.action_net = GMMHead( 78 | feature_fusion_output_dim, 79 | n_modes=action_net_gmm_n_modes, 80 | action_dim=action_dim, 81 | hidden_dim=action_net_hidden_dim, 82 | hidden_depth=action_net_hidden_depth, 83 | activation=action_net_activation, 84 | low_noise_eval=gmm_low_noise_eval, 85 | ) 86 | self.intervention_head = CategoricalNet( 87 | feature_fusion_output_dim, 88 | action_dim=2, # intervention or not 89 | hidden_dim=intervention_head_hidden_dim, 90 | hidden_depth=intervention_head_hidden_depth, 91 | activation=intervention_head_activation, 92 | ) 93 | if update_intervention_head_only: 94 | assert os.path.exists(ckpt_path_if_update_intervention_head_only) 95 | ckpt = torch.load( 96 | ckpt_path_if_update_intervention_head_only, map_location="cpu" 97 | ) 98 | 99 | feature_extractor_weighs = { 100 | k: v 101 | for k, v in ckpt["state_dict"].items() 102 | if k.startswith("residual_policy.feature_extractor") 103 | } 104 | load_state_dict( 105 | self.feature_extractor, 106 | feature_extractor_weighs, 107 | strip_prefix="residual_policy.feature_extractor.", 108 | strict=True, 109 | ) 110 | freeze_params(self.feature_extractor) 111 | 112 | action_net_weights = { 113 | k: v 114 | for k, v in ckpt["state_dict"].items() 115 | if k.startswith("residual_policy.action_net") 116 | } 117 | load_state_dict( 118 | self.action_net, 119 | action_net_weights, 120 | strip_prefix="residual_policy.action_net.", 121 | strict=True, 122 | ) 123 | freeze_params(self.action_net) 124 | 125 | self._deterministic_inference = deterministic_inference 126 | 127 | def forward(self, obs): 128 | feature = self.feature_extractor(obs) 129 | action_dist = self.action_net(feature) 130 | intervention_dist = self.intervention_head(feature) 131 | return action_dist, intervention_dist 132 | 133 | @torch.no_grad() 134 | def act(self, obs, deterministic=None): 135 | action_dist, intervention_dist = self.forward(obs) 136 | if deterministic is None: 137 | deterministic = self._deterministic_inference 138 | if deterministic: 139 | return action_dist.mode(), intervention_dist.mode() 140 | else: 141 | return action_dist.sample(), intervention_dist.sample() 142 | -------------------------------------------------------------------------------- /transic/rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/rl/__init__.py -------------------------------------------------------------------------------- /transic/rl/models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | import torch 4 | from gym import spaces 5 | import rl_games.common.divergence as divergence 6 | from rl_games.common.extensions.distributions import CategoricalMasked 7 | from rl_games.algos_torch.running_mean_std import RunningMeanStd 8 | 9 | from transic.rl.moving_avg import RunningMeanStdObs 10 | 11 | 12 | class BaseModel: 13 | def __init__(self, model_class): 14 | self.model_class = model_class 15 | 16 | def is_rnn(self): 17 | return False 18 | 19 | def is_separate_critic(self): 20 | return False 21 | 22 | def get_value_layer(self): 23 | return None 24 | 25 | def build(self, config): 26 | obs_shape = config["input_shape"] 27 | normalize_value = config.get("normalize_value", False) 28 | normalize_input = config.get("normalize_input", False) 29 | normalize_input_excluded_keys = config.get( 30 | "normalize_input_excluded_keys", None 31 | ) 32 | value_size = config.get("value_size", 1) 33 | return self.Network( 34 | self.network_builder.build(self.model_class, **config), 35 | obs_shape=obs_shape, 36 | normalize_value=normalize_value, 37 | normalize_input=normalize_input, 38 | value_size=value_size, 39 | normalize_input_excluded_keys=normalize_input_excluded_keys, 40 | ) 41 | 42 | 43 | class BaseModelNetwork(nn.Module): 44 | def __init__( 45 | self, 46 | obs_shape, 47 | normalize_value, 48 | normalize_input, 49 | value_size, 50 | normalize_input_excluded_keys=None, 51 | ): 52 | nn.Module.__init__(self) 53 | self.obs_shape = obs_shape 54 | self.normalize_value = normalize_value 55 | self.normalize_input = normalize_input 56 | self.value_size = value_size 57 | 58 | if normalize_value: 59 | self.value_mean_std = RunningMeanStd( 60 | (self.value_size,) 61 | ) # GeneralizedMovingStats((self.value_size,)) # 62 | if normalize_input: 63 | if isinstance(obs_shape, spaces.Dict): 64 | self.running_mean_std = RunningMeanStdObs( 65 | obs_shape, exclude_keys=normalize_input_excluded_keys 66 | ) 67 | else: 68 | self.running_mean_std = RunningMeanStd(obs_shape) 69 | 70 | def norm_obs(self, observation): 71 | with torch.no_grad(): 72 | return ( 73 | self.running_mean_std(observation) 74 | if self.normalize_input 75 | else observation 76 | ) 77 | 78 | def denorm_value(self, value): 79 | with torch.no_grad(): 80 | return ( 81 | self.value_mean_std(value, denorm=True) 82 | if self.normalize_value 83 | else value 84 | ) 85 | 86 | 87 | class ModelA2C(BaseModel): 88 | def __init__(self, network): 89 | BaseModel.__init__(self, "a2c") 90 | self.network_builder = network 91 | 92 | class Network(BaseModelNetwork): 93 | def __init__(self, a2c_network, **kwargs): 94 | BaseModelNetwork.__init__(self, **kwargs) 95 | self.a2c_network = a2c_network 96 | 97 | def is_rnn(self): 98 | return self.a2c_network.is_rnn() 99 | 100 | def get_default_rnn_state(self): 101 | return self.a2c_network.get_default_rnn_state() 102 | 103 | def get_value_layer(self): 104 | return self.a2c_network.get_value_layer() 105 | 106 | def kl(self, p_dict, q_dict): 107 | p = p_dict["logits"] 108 | q = q_dict["logits"] 109 | return divergence.d_kl_discrete(p, q) 110 | 111 | def forward(self, input_dict): 112 | is_train = input_dict.get("is_train", True) 113 | action_masks = input_dict.get("action_masks", None) 114 | prev_actions = input_dict.get("prev_actions", None) 115 | input_dict["obs"] = self.norm_obs(input_dict["obs"]) 116 | logits, value, states = self.a2c_network(input_dict) 117 | 118 | if is_train: 119 | categorical = CategoricalMasked(logits=logits, masks=action_masks) 120 | prev_neglogp = -categorical.log_prob(prev_actions) 121 | entropy = categorical.entropy() 122 | result = { 123 | "prev_neglogp": torch.squeeze(prev_neglogp), 124 | "logits": categorical.logits, 125 | "values": value, 126 | "entropy": entropy, 127 | "rnn_states": states, 128 | } 129 | return result 130 | else: 131 | categorical = CategoricalMasked(logits=logits, masks=action_masks) 132 | selected_action = categorical.sample().long() 133 | neglogp = -categorical.log_prob(selected_action) 134 | result = { 135 | "neglogpacs": torch.squeeze(neglogp), 136 | "values": self.denorm_value(value), 137 | "actions": selected_action, 138 | "logits": categorical.logits, 139 | "rnn_states": states, 140 | } 141 | return result 142 | 143 | 144 | class ModelA2CContinuousLogStd(BaseModel): 145 | def __init__(self, network): 146 | BaseModel.__init__(self, "a2c") 147 | self.network_builder = network 148 | 149 | class Network(BaseModelNetwork): 150 | def __init__(self, a2c_network, **kwargs): 151 | BaseModelNetwork.__init__(self, **kwargs) 152 | self.a2c_network = a2c_network 153 | 154 | def is_rnn(self): 155 | return self.a2c_network.is_rnn() 156 | 157 | def get_value_layer(self): 158 | return self.a2c_network.get_value_layer() 159 | 160 | def get_default_rnn_state(self): 161 | return self.a2c_network.get_default_rnn_state() 162 | 163 | def forward(self, input_dict): 164 | is_train = input_dict.get("is_train", True) 165 | prev_actions = input_dict.get("prev_actions", None) 166 | input_dict["obs"] = self.norm_obs(input_dict["obs"]) 167 | mu, logstd, value, states = self.a2c_network(input_dict) 168 | sigma = torch.exp(logstd) 169 | distr = torch.distributions.Normal(mu, sigma, validate_args=False) 170 | if is_train: 171 | entropy = distr.entropy().sum(dim=-1) 172 | prev_neglogp = self.neglogp(prev_actions, mu, sigma, logstd) 173 | result = { 174 | "prev_neglogp": torch.squeeze(prev_neglogp), 175 | "values": value, 176 | "entropy": entropy, 177 | "rnn_states": states, 178 | "mus": mu, 179 | "sigmas": sigma, 180 | } 181 | return result 182 | else: 183 | selected_action = distr.sample() 184 | neglogp = self.neglogp(selected_action, mu, sigma, logstd) 185 | result = { 186 | "neglogpacs": torch.squeeze(neglogp), 187 | "values": self.denorm_value(value), 188 | "actions": selected_action, 189 | "rnn_states": states, 190 | "mus": mu, 191 | "sigmas": sigma, 192 | } 193 | return result 194 | 195 | def neglogp(self, x, mean, std, logstd): 196 | return ( 197 | 0.5 * (((x - mean) / std) ** 2).sum(dim=-1) 198 | + 0.5 * np.log(2.0 * np.pi) * x.size()[-1] 199 | + logstd.sum(dim=-1) 200 | ) 201 | -------------------------------------------------------------------------------- /transic/rl/moving_avg.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import torch.nn as nn 4 | from rl_games.algos_torch.running_mean_std import RunningMeanStd 5 | from gym import spaces 6 | 7 | 8 | class RunningMeanStdObs(nn.Module): 9 | def __init__( 10 | self, 11 | insize, 12 | epsilon=1e-05, 13 | per_channel=False, 14 | norm_only=False, 15 | exclude_keys: list | None = None, 16 | ): 17 | assert isinstance(insize, spaces.Dict) 18 | exclude_keys = exclude_keys or [] 19 | super(RunningMeanStdObs, self).__init__() 20 | self.running_mean_std = nn.ModuleDict( 21 | { 22 | k: RunningMeanStd(v.shape, epsilon, per_channel, norm_only) 23 | for k, v in insize.items() 24 | if k not in exclude_keys 25 | } 26 | ) 27 | self._exclude_keys = exclude_keys 28 | 29 | def forward(self, input, denorm=False): 30 | res = { 31 | k: self.running_mean_std[k](v, denorm) if k not in self._exclude_keys else v 32 | for k, v in input.items() 33 | } 34 | return res 35 | -------------------------------------------------------------------------------- /transic/rl/runner.py: -------------------------------------------------------------------------------- 1 | from rl_games.torch_runner import Runner as _Runner, _override_sigma 2 | 3 | from transic.rl.agent import PPOAgent 4 | from transic.rl.player import MyPPOPlayerContinuous as PPOPlayer 5 | 6 | 7 | def _restore(agent, args, is_train_restore: bool): 8 | if ( 9 | "checkpoint" in args 10 | and args["checkpoint"] is not None 11 | and args["checkpoint"] != "" 12 | ): 13 | set_epoch = args.get("from_ckpt_epoch", True) 14 | if is_train_restore: 15 | agent.restore(args["checkpoint"], set_epoch) 16 | else: 17 | agent.restore(args["checkpoint"]) 18 | 19 | 20 | class Runner(_Runner): 21 | def __init__(self, algo_observer=None): 22 | super().__init__(algo_observer) 23 | self.algo_factory.register_builder("ppo", lambda **kwargs: PPOAgent(**kwargs)) 24 | self.player_factory.register_builder( 25 | "ppo", lambda **kwargs: PPOPlayer(**kwargs) 26 | ) 27 | 28 | def run_train(self, args): 29 | print("Started to train") 30 | agent = self.algo_factory.create( 31 | self.algo_name, base_name="run", params=self.params 32 | ) 33 | _restore(agent, args, is_train_restore=True) 34 | _override_sigma(agent, args) 35 | agent.train() 36 | 37 | def run_play(self, args): 38 | print("Started to play") 39 | save_rollouts_cfg = args.get("save_rollouts", {}) 40 | player = self.create_player(save_rollouts_cfg) 41 | _restore(player, args, is_train_restore=False) 42 | _override_sigma(player, args) 43 | player.run() 44 | 45 | def create_player(self, save_rollouts_cfg): 46 | return self.player_factory.create( 47 | self.algo_name, params=self.params, **save_rollouts_cfg 48 | ) 49 | -------------------------------------------------------------------------------- /transic/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/utils/__init__.py -------------------------------------------------------------------------------- /transic/utils/config_utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from omegaconf import OmegaConf 3 | 4 | 5 | def is_sequence(obj): 6 | """ 7 | Returns: 8 | True if the sequence is a collections.Sequence and not a string. 9 | """ 10 | return isinstance(obj, collections.abc.Sequence) and not isinstance(obj, str) 11 | 12 | 13 | def is_mapping(obj): 14 | """ 15 | Returns: 16 | True if the sequence is a collections.Mapping 17 | """ 18 | return isinstance(obj, collections.abc.Mapping) 19 | 20 | 21 | def omegaconf_to_dict(cfg, resolve: bool = True, enum_to_str: bool = False): 22 | """ 23 | Convert arbitrary nested omegaconf objects to primitive containers 24 | 25 | WARNING: cannot use tree lib because it gets confused on DictConfig and ListConfig 26 | """ 27 | kw = dict(resolve=resolve, enum_to_str=enum_to_str) 28 | if OmegaConf.is_config(cfg): 29 | return OmegaConf.to_container(cfg, **kw) 30 | elif is_sequence(cfg): 31 | return type(cfg)(omegaconf_to_dict(c, **kw) for c in cfg) 32 | elif is_mapping(cfg): 33 | return {k: omegaconf_to_dict(c, **kw) for k, c in cfg.items()} 34 | else: 35 | return cfg 36 | -------------------------------------------------------------------------------- /transic/utils/misc_utils.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Callable, Literal 2 | 3 | import fnmatch 4 | 5 | 6 | def _match_patterns_helper(element, patterns): 7 | for p in patterns: 8 | if callable(p) and p(element): 9 | return True 10 | if fnmatch.fnmatch(element, p): 11 | return True 12 | return False 13 | 14 | 15 | def match_patterns( 16 | item: str, 17 | include: Union[str, List[str], Callable, List[Callable], None] = None, 18 | exclude: Union[str, List[str], Callable, List[Callable], None] = None, 19 | *, 20 | precedence: Literal["include", "exclude"] = "exclude", 21 | ): 22 | """ 23 | Args: 24 | include: None to disable `include` filter and delegate to exclude 25 | precedence: "include" or "exclude" 26 | """ 27 | assert precedence in ["include", "exclude"] 28 | if exclude is None: 29 | exclude = [] 30 | if isinstance(exclude, (str, Callable)): 31 | exclude = [exclude] 32 | if isinstance(include, (str, Callable)): 33 | include = [include] 34 | if include is None: 35 | # exclude is the sole veto vote 36 | return not _match_patterns_helper(item, exclude) 37 | 38 | if precedence == "include": 39 | return _match_patterns_helper(item, include) 40 | else: 41 | if _match_patterns_helper(item, exclude): 42 | return False 43 | else: 44 | return _match_patterns_helper(item, include) 45 | -------------------------------------------------------------------------------- /transic/utils/reformat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2023, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | from omegaconf import DictConfig, OmegaConf 30 | from typing import Dict 31 | 32 | 33 | def omegaconf_to_dict(d: DictConfig) -> Dict: 34 | """Converts an omegaconf DictConfig to a python Dict, respecting variable interpolation.""" 35 | ret = {} 36 | for k, v in d.items(): 37 | if isinstance(v, DictConfig): 38 | ret[k] = omegaconf_to_dict(v) 39 | else: 40 | ret[k] = v 41 | return ret 42 | 43 | 44 | def print_dict(val, nesting: int = -4, start: bool = True): 45 | """Outputs a nested dictionory.""" 46 | if type(val) == dict: 47 | if not start: 48 | print("") 49 | nesting += 4 50 | for k in val: 51 | print(nesting * " ", end="") 52 | print(k, end=": ") 53 | print_dict(val[k], nesting, start=False) 54 | else: 55 | print(val) 56 | 57 | 58 | # EOF 59 | -------------------------------------------------------------------------------- /transic/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import tree 2 | import torch 3 | import torch.nn as nn 4 | 5 | from transic.utils.tree_utils import tree_value_at_path 6 | 7 | 8 | def load_state_dict(objects, states, strip_prefix=None, strict=False): 9 | """ 10 | Args: 11 | strict: objects and states must match exactly 12 | strip_prefix: only match the keys that have the prefix, and strip it 13 | """ 14 | 15 | def _load(paths, obj): 16 | if not _implements_method(obj, "load_state_dict"): 17 | raise ValueError( 18 | f"Object {type(obj)} does not support load_state_dict() method" 19 | ) 20 | try: 21 | state = tree_value_at_path(states, paths) 22 | except ValueError: # paths do not exist in `states` structure 23 | if strict: 24 | raise 25 | else: 26 | return 27 | if strip_prefix: 28 | assert isinstance(strip_prefix, str) 29 | state = { 30 | k[len(strip_prefix) :]: v 31 | for k, v in state.items() 32 | if k.startswith(strip_prefix) 33 | } 34 | if isinstance(obj, nn.Module): 35 | return obj.load_state_dict(state, strict=strict) 36 | else: 37 | return obj.load_state_dict(state) 38 | 39 | return tree.map_structure_with_path(_load, objects) 40 | 41 | 42 | def _implements_method(object, method: str): 43 | """ 44 | Returns: 45 | True if object implements a method 46 | """ 47 | return hasattr(object, method) and callable(getattr(object, method)) 48 | 49 | 50 | def set_requires_grad(model, requires_grad): 51 | if torch.is_tensor(model): 52 | model.requires_grad = requires_grad 53 | else: 54 | for param in model.parameters(): 55 | param.requires_grad = requires_grad 56 | 57 | 58 | def freeze_params(model): 59 | set_requires_grad(model, False) 60 | if not torch.is_tensor(model): 61 | model.eval() 62 | -------------------------------------------------------------------------------- /transic/utils/tree_utils.py: -------------------------------------------------------------------------------- 1 | from typing import List, TypeVar, Iterable, Tuple 2 | import numpy as np 3 | 4 | try: 5 | import tree 6 | 7 | except ImportError: 8 | raise ImportError("Please install dm_tree first: `pip install dm_tree`") 9 | 10 | ElementType = TypeVar("ElementType") 11 | 12 | 13 | def fast_map_structure(func, *structure): 14 | """Faster map_structure implementation which skips some error checking.""" 15 | flat_structure = (tree.flatten(s) for s in structure) 16 | entries = zip(*flat_structure) 17 | # Arbitrarily choose one of the structures of the original sequence (the last) 18 | # to match the structure for the flattened sequence. 19 | return tree.unflatten_as(structure[-1], [func(*x) for x in entries]) 20 | 21 | 22 | def stack_sequence_fields(sequence: Iterable[ElementType]) -> ElementType: 23 | """Stacks a list of identically nested objects. 24 | 25 | This takes a sequence of identically nested objects and returns a single 26 | nested object whose ith leaf is a stacked numpy array of the corresponding 27 | ith leaf from each element of the sequence. 28 | 29 | For example, if `sequence` is: 30 | 31 | ```python 32 | [{ 33 | 'action': np.array([1.0]), 34 | 'observation': (np.array([0.0, 1.0, 2.0]),), 35 | 'reward': 1.0 36 | }, { 37 | 'action': np.array([0.5]), 38 | 'observation': (np.array([1.0, 2.0, 3.0]),), 39 | 'reward': 0.0 40 | }, { 41 | 'action': np.array([0.3]),1 42 | 'observation': (np.array([2.0, 3.0, 4.0]),), 43 | 'reward': 0.5 44 | }] 45 | ``` 46 | 47 | Then this function will return: 48 | 49 | ```python 50 | { 51 | 'action': np.array([....]) # array shape = [3 x 1] 52 | 'observation': (np.array([...]),) # array shape = [3 x 3] 53 | 'reward': np.array([...]) # array shape = [3] 54 | } 55 | ``` 56 | 57 | Note that the 'observation' entry in the above example has two levels of 58 | nesting, i.e it is a tuple of arrays. 59 | 60 | Args: 61 | sequence: a list of identically nested objects. 62 | 63 | Returns: 64 | A nested object with numpy. 65 | 66 | Raises: 67 | ValueError: If `sequence` is an empty sequence. 68 | """ 69 | # Handle empty input sequences. 70 | if not sequence: 71 | raise ValueError("Input sequence must not be empty") 72 | 73 | # Default to asarray when arrays don't have the same shape to be compatible 74 | # with old behaviour. 75 | try: 76 | return fast_map_structure(lambda *values: np.stack(values), *sequence) 77 | except ValueError: 78 | return fast_map_structure(lambda *values: np.asarray(values), *sequence) 79 | 80 | 81 | def unstack_sequence_fields(struct: ElementType, batch_size: int) -> List[ElementType]: 82 | """Converts a struct of batched arrays to a list of structs. 83 | 84 | This is effectively the inverse of `stack_sequence_fields`. 85 | 86 | Args: 87 | struct: An (arbitrarily nested) structure of arrays. 88 | batch_size: The length of the leading dimension of each array in the struct. 89 | This is assumed to be static and known. 90 | 91 | Returns: 92 | A list of structs with the same structure as `struct`, where each leaf node 93 | is an unbatched element of the original leaf node. 94 | """ 95 | 96 | return [tree.map_structure(lambda s, i=i: s[i], struct) for i in range(batch_size)] 97 | 98 | 99 | def tree_value_at_path(obj, paths: Tuple): 100 | try: 101 | for p in paths: 102 | obj = obj[p] 103 | return obj 104 | except Exception as e: 105 | raise ValueError(f"{e}\n\n-- Incorrect nested path {paths} for object: {obj}.") 106 | -------------------------------------------------------------------------------- /transic/utils/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2023, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | # python 30 | # import pwd 31 | import getpass 32 | import tempfile 33 | import time 34 | from collections import OrderedDict 35 | from os.path import join 36 | 37 | import numpy as np 38 | import torch 39 | import random 40 | import os 41 | 42 | 43 | def retry(times, exceptions): 44 | """ 45 | Retry Decorator https://stackoverflow.com/a/64030200/1645784 46 | Retries the wrapped function/method `times` times if the exceptions listed 47 | in ``exceptions`` are thrown 48 | :param times: The number of times to repeat the wrapped function/method 49 | :type times: Int 50 | :param exceptions: Lists of exceptions that trigger a retry attempt 51 | :type exceptions: Tuple of Exceptions 52 | """ 53 | 54 | def decorator(func): 55 | def newfn(*args, **kwargs): 56 | attempt = 0 57 | while attempt < times: 58 | try: 59 | return func(*args, **kwargs) 60 | except exceptions: 61 | print( 62 | f"Exception thrown when attempting to run {func}, attempt {attempt} out of {times}" 63 | ) 64 | time.sleep(min(2**attempt, 30)) 65 | attempt += 1 66 | 67 | return func(*args, **kwargs) 68 | 69 | return newfn 70 | 71 | return decorator 72 | 73 | 74 | def flatten_dict(d, prefix="", separator="."): 75 | res = dict() 76 | for key, value in d.items(): 77 | if isinstance(value, (dict, OrderedDict)): 78 | res.update(flatten_dict(value, prefix + key + separator, separator)) 79 | else: 80 | res[prefix + key] = value 81 | 82 | return res 83 | 84 | 85 | def set_np_formatting(): 86 | """formats numpy print""" 87 | np.set_printoptions( 88 | edgeitems=30, 89 | infstr="inf", 90 | linewidth=4000, 91 | nanstr="nan", 92 | precision=2, 93 | suppress=False, 94 | threshold=10000, 95 | formatter=None, 96 | ) 97 | 98 | 99 | def set_seed(seed, torch_deterministic=False, rank=0): 100 | """set seed across modules""" 101 | if seed == -1 and torch_deterministic: 102 | seed = 42 + rank 103 | elif seed == -1: 104 | seed = np.random.randint(0, 10000) 105 | else: 106 | seed = seed + rank 107 | 108 | print("Setting seed: {}".format(seed)) 109 | 110 | random.seed(seed) 111 | np.random.seed(seed) 112 | torch.manual_seed(seed) 113 | os.environ["PYTHONHASHSEED"] = str(seed) 114 | torch.cuda.manual_seed(seed) 115 | torch.cuda.manual_seed_all(seed) 116 | 117 | if torch_deterministic: 118 | # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility 119 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" 120 | torch.backends.cudnn.benchmark = False 121 | torch.backends.cudnn.deterministic = True 122 | torch.use_deterministic_algorithms(True) 123 | else: 124 | torch.backends.cudnn.benchmark = True 125 | torch.backends.cudnn.deterministic = False 126 | 127 | return seed 128 | 129 | 130 | def nested_dict_set_attr(d, key, val): 131 | pre, _, post = key.partition(".") 132 | if post: 133 | nested_dict_set_attr(d[pre], post, val) 134 | else: 135 | d[key] = val 136 | 137 | 138 | def nested_dict_get_attr(d, key): 139 | pre, _, post = key.partition(".") 140 | if post: 141 | return nested_dict_get_attr(d[pre], post) 142 | else: 143 | return d[key] 144 | 145 | 146 | def ensure_dir_exists(path): 147 | if not os.path.exists(path): 148 | os.makedirs(path) 149 | return path 150 | 151 | 152 | def safe_ensure_dir_exists(path): 153 | """Should be safer in multi-treaded environment.""" 154 | try: 155 | return ensure_dir_exists(path) 156 | except FileExistsError: 157 | return path 158 | 159 | 160 | def get_username(): 161 | uid = os.getuid() 162 | try: 163 | return getpass.getuser() 164 | except KeyError: 165 | # worst case scenario - let's just use uid 166 | return str(uid) 167 | 168 | 169 | def project_tmp_dir(): 170 | tmp_dir_name = f"ige_{get_username()}" 171 | return safe_ensure_dir_exists(join(tempfile.gettempdir(), tmp_dir_name)) 172 | 173 | 174 | # EOF 175 | -------------------------------------------------------------------------------- /transic/utils/wandb_utils.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import torch 3 | import wandb 4 | from rl_games.common.algo_observer import AlgoObserver 5 | 6 | from transic.utils.utils import retry 7 | from transic.utils.reformat import omegaconf_to_dict 8 | 9 | 10 | class WandbAlgoObserver(AlgoObserver): 11 | """Need this to propagate the correct experiment name after initialization.""" 12 | 13 | def __init__(self, cfg): 14 | super().__init__() 15 | self.cfg = cfg 16 | 17 | def before_init(self, base_name, config, experiment_name): 18 | """ 19 | Must call initialization of Wandb before RL-games summary writer is initialized, otherwise 20 | sync_tensorboard does not work. 21 | """ 22 | 23 | import wandb 24 | 25 | wandb_unique_id = f"uid_{experiment_name}" 26 | print(f"Wandb using unique id {wandb_unique_id}") 27 | 28 | cfg = self.cfg 29 | 30 | # this can fail occasionally, so we try a couple more times 31 | @retry(3, exceptions=(Exception,)) 32 | def init_wandb(): 33 | wandb.init( 34 | project=cfg.wandb_project, 35 | entity=cfg.wandb_entity, 36 | group=cfg.wandb_group, 37 | tags=cfg.wandb_tags, 38 | sync_tensorboard=True, 39 | id=wandb_unique_id, 40 | name=experiment_name, 41 | resume=True, 42 | settings=wandb.Settings(start_method="fork"), 43 | ) 44 | 45 | if cfg.wandb_logcode_dir: 46 | wandb.run.log_code(root=cfg.wandb_logcode_dir) 47 | print("wandb running directory........", wandb.run.dir) 48 | 49 | print("Initializing WandB...") 50 | try: 51 | init_wandb() 52 | except Exception as exc: 53 | print(f"Could not initialize WandB! {exc}") 54 | 55 | if isinstance(self.cfg, dict): 56 | wandb.config.update(self.cfg, allow_val_change=True) 57 | else: 58 | wandb.config.update(omegaconf_to_dict(self.cfg), allow_val_change=True) 59 | 60 | 61 | class WandbVideoCaptureWrapper(gym.Wrapper): 62 | def __init__( 63 | self, 64 | env, 65 | n_parallel_recorders: int = 1, 66 | n_successful_videos_to_record: int = 50, 67 | ): 68 | super().__init__(env) 69 | n_parallel_recorders = min(n_parallel_recorders, env.num_envs) 70 | self._n_recorders = n_parallel_recorders 71 | self._videos = [[] for _ in range(n_parallel_recorders)] 72 | self._rcd_idxs = [ 73 | i 74 | for i in range(env.num_envs) 75 | if i % (env.num_envs // n_parallel_recorders) == 0 76 | ][:n_parallel_recorders] 77 | self._n_video_saved = 0 78 | self._n_successful_video_saved = 0 79 | self._n_successful_videos_to_record = n_successful_videos_to_record 80 | 81 | def reset(self, **kwargs): 82 | self._videos = [[] for _ in range(self._n_recorders)] 83 | return super().reset(**kwargs) 84 | 85 | def step(self, action): 86 | obs, reward, done, info = super().step(action) 87 | for i, idx in enumerate(self._rcd_idxs): 88 | self._videos[i].append(self.env.camera_obs[idx].clone()) 89 | if torch.any(done): 90 | for i, idx in enumerate(self._rcd_idxs): 91 | if done[idx]: 92 | video = torch.stack(self._videos[i])[ 93 | ..., :-1 94 | ] # (T, H, W, C), RGBA -> RGB 95 | video = video.to(dtype=torch.uint8) 96 | video = ( 97 | video.permute(0, 3, 1, 2).detach().cpu().numpy() 98 | ) # (T, C, H, W) 99 | video = wandb.Video(video, fps=10, format="mp4") 100 | succeeded = self.env.success_buf 101 | failed = self.env.failure_buf 102 | status = "timeout" 103 | if succeeded[idx]: 104 | status = "success" 105 | self._n_successful_video_saved += 1 106 | elif failed[idx]: 107 | status = "failure" 108 | wandb.log( 109 | {f"test_video/video-{self._n_video_saved}_{status}": video} 110 | ) 111 | self._n_video_saved += 1 112 | self._videos[i] = [] 113 | if ( 114 | self._n_successful_video_saved 115 | >= self._n_successful_videos_to_record 116 | ): 117 | exit() 118 | return obs, reward, done, info 119 | --------------------------------------------------------------------------------