├── .gitignore
├── LICENSE
├── README.md
├── main
    ├── cfg
    │   ├── config.yaml
    │   ├── distillation_config.yaml
    │   ├── distillation_student_arch
    │   │   ├── pointnet.yaml
    │   │   └── rnn_pointnet.yaml
    │   ├── residual_config.yaml
    │   ├── residual_policy_arch
    │   │   ├── perceiver.yaml
    │   │   └── pointnet.yaml
    │   ├── residual_policy_task
    │   │   ├── default.yaml
    │   │   └── insert.yaml
    │   ├── rl_train
    │   │   ├── InsertFullPPO.yaml
    │   │   ├── InsertSinglePPO.yaml
    │   │   ├── LiftLeanedLegPPO.yaml
    │   │   ├── ReachAndGraspFullPPO.yaml
    │   │   ├── ReachAndGraspSinglePPO.yaml
    │   │   ├── ScrewFullPPO.yaml
    │   │   ├── ScrewSinglePPO.yaml
    │   │   └── StabilizePPO.yaml
    │   └── task
    │   │   ├── InsertFull.yaml
    │   │   ├── InsertFullPCD.yaml
    │   │   ├── InsertSingle.yaml
    │   │   ├── InsertSinglePCD.yaml
    │   │   ├── LiftLeanedLeg.yaml
    │   │   ├── LiftLeanedLegPCD.yaml
    │   │   ├── ReachAndGraspFull.yaml
    │   │   ├── ReachAndGraspFullPCD.yaml
    │   │   ├── ReachAndGraspSingle.yaml
    │   │   ├── ReachAndGraspSinglePCD.yaml
    │   │   ├── ScrewFull.yaml
    │   │   ├── ScrewFullPCD.yaml
    │   │   ├── ScrewSingle.yaml
    │   │   ├── ScrewSinglePCD.yaml
    │   │   ├── Stabilize.yaml
    │   │   └── StabilizePCD.yaml
    ├── correction_data_collection.py
    ├── distillation
    │   ├── test.py
    │   └── train.py
    ├── integrated_deployment.py
    ├── residual
    │   └── train.py
    └── rl
    │   └── train.py
├── media
    ├── SUSig-red.png
    └── method_overview.gif
├── requirements.txt
├── setup.py
└── transic
    ├── __init__.py
    ├── distillation
        ├── __init__.py
        ├── data
        │   ├── __init__.py
        │   ├── collate.py
        │   ├── data_module.py
        │   ├── dataset.py
        │   └── dummy.py
        ├── module.py
        └── policy
        │   ├── __init__.py
        │   ├── pointnet_policy.py
        │   └── rnn_pointnet_policy.py
    ├── learn
        ├── __init__.py
        ├── lightning.py
        ├── lr_schedule.py
        ├── optimizer_group.py
        └── policy
        │   ├── __init__.py
        │   ├── base.py
        │   └── distributions.py
    ├── nn
        ├── __init__.py
        ├── features
        │   ├── __init__.py
        │   ├── embedding.py
        │   ├── fusion.py
        │   ├── identity.py
        │   └── pointcloud
        │   │   ├── __init__.py
        │   │   ├── pointnet.py
        │   │   └── set_transformer
        │   │       ├── __init__.py
        │   │       ├── set_transformer.py
        │   │       └── set_xf_pcd_encoder.py
        └── mlp.py
    ├── real_world
        ├── __init__.py
        └── obs.py
    ├── residual
        ├── __init__.py
        ├── data
        │   ├── __init__.py
        │   ├── collate.py
        │   ├── data_module.py
        │   └── dataset.py
        ├── module.py
        └── policy
        │   ├── __init__.py
        │   ├── perceiver_residual_policy.py
        │   └── pointnet_residual_policy.py
    ├── rl
        ├── __init__.py
        ├── agent.py
        ├── base.py
        ├── models.py
        ├── moving_avg.py
        ├── network_builder.py
        ├── player.py
        └── runner.py
    └── utils
        ├── __init__.py
        ├── array.py
        ├── config_utils.py
        ├── datadict.py
        ├── misc_utils.py
        ├── reformat.py
        ├── rlgames_utils.py
        ├── torch_utils.py
        ├── tree_utils.py
        ├── utils.py
        └── wandb_utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.ipynb
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | .static_storage/
 57 | .media/
 58 | local_settings.py
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # Environments
 86 | .env
 87 | .venv
 88 | # env/
 89 | venv/
 90 | # ENV/
 91 | env.bak/
 92 | venv.bak/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 
107 | # Mac
108 | .DS_Store
109 | 
110 | # MuJoCo License key
111 | mjkey.txt
112 | 
113 | .mujocomanip_temp_model.xml
114 | 
115 | # Python IDE
116 | .idea
117 | 
118 | # Locally generated files
119 | dump.rdb
120 | *.local.ipynb
121 | runs/
122 | temp*
123 | debug_*
124 | *.swp
125 | 
126 | .tabnine_root
127 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Yunfan Jiang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/main/cfg/config.yaml:
--------------------------------------------------------------------------------
 1 | # set default task and default training config based on task
 2 | defaults:
 3 |   - task: ???
 4 |   - rl_train: ${find_rl_train_config:${task}}
 5 |   - override hydra/job_logging: disabled
 6 |   - _self_
 7 | 
 8 | # Task name - used to pick the class to load
 9 | task_name: ${task.name}
10 | # experiment name. defaults to name of training config
11 | experiment: ''
12 | 
13 | # if set to positive integer, overrides the default number of environments
14 | num_envs: ''
15 | 
16 | # seed - set to -1 to choose random seed
17 | seed: -1
18 | # set to True for deterministic performance
19 | torch_deterministic: False
20 | 
21 | # set the maximum number of learning iterations to train for. overrides default per-environment setting
22 | max_iterations: 9999999999999  # train forever
23 | 
24 | ## Device config
25 | #  'physx' or 'flex'
26 | physics_engine: 'physx'
27 | # whether to use cpu or gpu pipeline
28 | pipeline: 'gpu'
29 | # device for running physics simulation
30 | sim_device: 'cuda:0'
31 | # device to run RL
32 | rl_device: 'cuda:0'
33 | graphics_device_id: 0
34 | 
35 | ## PhysX arguments
36 | num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
37 | solver_type: 1 # 0: pgs, 1: tgs
38 | num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread
39 | 
40 | # RLGames Arguments
41 | # test - if set, run policy in inference mode (requires setting checkpoint to load)
42 | test: False
43 | # save rollouts config, used for distillation
44 | save_rollouts: False
45 | save_successful_rollouts_only: True
46 | num_rollouts_to_save: 10000
47 | min_episode_length: 20
48 | # used to set checkpoint path
49 | checkpoint: ''
50 | from_ckpt_epoch: false
51 | # set sigma when restoring network
52 | sigma: ''
53 | # set to True to use multi-gpu training
54 | multi_gpu: False
55 | 
56 | wandb_activate: False
57 | wandb_group: ''
58 | wandb_name: ${rl_train.params.config.name}
59 | wandb_entity: null  # set to your wandb entity if using wandb
60 | wandb_project: null  # set to your wandb project if using wandb
61 | wandb_tags: []
62 | wandb_logcode_dir: '' 
63 | 
64 | capture_video: False
65 | n_parallel_recorders: 8
66 | n_successful_videos_to_record: 50
67 | display: False
68 | headless: True
69 | 
70 | # set the directory where the output files get saved
71 | hydra:
72 |   output_subdir: null
73 |   run:
74 |     dir: .
75 | 


--------------------------------------------------------------------------------
/main/cfg/distillation_config.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - config
  3 |   - _self_  # all below configs will override this conf.yaml
  4 |   - distillation_student_arch: ???
  5 | 
  6 | run_name: "${arch_name}_lr${lr}_wd${wd}_b${bs}"
  7 | exp_root_dir: ???
  8 | 
  9 | arch_name: ???  # filled by distillation_student_arch
 10 | 
 11 | # ====== main cfg ======
 12 | seed: -1
 13 | gpus: 1
 14 | lr: 1e-4
 15 | wd: 0.0
 16 | bs: 32
 17 | sim_device: 0
 18 | rl_device: 0
 19 | graphics_device_id: 0
 20 | eval_interval: 5  # every N epochs
 21 | data_path: ???
 22 | matched_scene_data_path: ???
 23 | 
 24 | # ------ logging ------
 25 | use_wandb: true
 26 | wandb_project: ???
 27 | wandb_run_name: ${run_name}
 28 | 
 29 | # ------ module ------
 30 | module:
 31 |   _target_: transic.distillation.module.DistillationModule
 32 |   # ====== policies ======
 33 |   prop_obs_keys:
 34 |     - q
 35 |     - cos_q
 36 |     - sin_q
 37 |     - eef_pos
 38 |     - eef_quat
 39 |     - gripper_width
 40 |   pcd_sample_points: ${task.env.pcdN}
 41 |   # ====== learning ======
 42 |   lr: ${lr}
 43 |   optimizer: "adam"
 44 |   weight_decay: ${wd}
 45 |   # ====== env creation ======
 46 |   rlg_task_cfg: ${task}
 47 |   num_envs: ${num_envs}
 48 |   display: ${display}
 49 |   # ====== training data augmentation ======
 50 |   enable_pcd_augmentation: true
 51 |   pcd_aug_apply_prob: 0.4
 52 |   pcd_aug_random_trans_high: [0.04, 0.04, 0.04]
 53 |   pcd_aug_random_trans_low: [-0.04, -0.04, -0.04]
 54 |   pcd_aug_jitter_ratio: 0.1
 55 |   pcd_aug_jitter_sigma: 0.01
 56 |   pcd_aug_jitter_low: -0.015
 57 |   pcd_aug_jitter_high: 0.015
 58 |   enable_prop_augmentation: true
 59 |   prop_aug_scale_sigma: 0.1
 60 |   prop_aug_scale_low: -0.3
 61 |   prop_aug_scale_high: 0.3
 62 |   # ====== eval ======
 63 |   n_eval_episodes: 1000
 64 |   # ====== pcd regularization ======
 65 |   enable_pcd_matched_scenes_regularization: true
 66 |   pcd_matched_scenes_reg_weight: 1e-3
 67 |   # ====== device ======
 68 |   sim_device: ${sim_device}
 69 |   rl_device: ${rl_device}
 70 |   graphics_device_id: ${graphics_device_id}
 71 | 
 72 | data_module:
 73 |   _target_: transic.distillation.data.DistillationDataModule
 74 |   data_path: ${data_path}
 75 |   matched_scene_data_path: ${matched_scene_data_path}
 76 |   ctx_len: -1  # -1 means not using sequence policy at all
 77 |   skip_first_n_steps: 0
 78 |   sampled_pcd_points: ${task.env.pcdN}
 79 |   refresh_pcd_sampling_idxs_interval: 0.1
 80 |   real_pcd_x_limits: [0.2, 0.7]
 81 |   real_pcd_y_limits: [-0.3, 0.3]
 82 |   real_pcd_z_min: 0.01
 83 |   batch_size: ${bs}
 84 |   dataloader_num_workers: 64
 85 |   seed: ${seed}
 86 | 
 87 | trainer:
 88 |   _target_: pytorch_lightning.Trainer
 89 |   accelerator: "gpu"
 90 |   devices: ${gpus}
 91 |   benchmark: true  # enables cudnn.benchmark
 92 |   accumulate_grad_batches: 1
 93 |   num_sanity_val_steps: 0
 94 |   max_epochs: 999999999
 95 |   val_check_interval: null
 96 |   check_val_every_n_epoch: ${eval_interval}
 97 |   gradient_clip_val: 1.0
 98 |   checkpoint:  # this sub-dict will be popped to send to ModelCheckpoint as args
 99 |   - filename: "s{step}-val_sr{val/success_rate:.5f}"
100 |     save_top_k: 5
101 |     save_last: true
102 |     monitor: "val/success_rate"
103 |     mode: max
104 |     auto_insert_metric_name: false  # prevent creating subfolder caused by the slash
105 | 
106 | # ------------- Testing ---------------
107 | test:
108 |   ckpt_path: null
109 | 
110 | hydra:
111 |   job:
112 |     chdir: true
113 |   run:
114 |     dir: "."
115 |   output_subdir: null


--------------------------------------------------------------------------------
/main/cfg/distillation_student_arch/pointnet.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | arch_name: pointnet
 4 | module:
 5 |   student_policy:
 6 |     _target_: transic.distillation.policy.PointNetPolicy
 7 |     point_channels: 3
 8 |     subtract_point_mean: false
 9 |     add_ee_embd: true
10 |     ee_embd_dim: 128
11 |     pointnet_output_dim: 256
12 |     pointnet_hidden_dim: 256
13 |     pointnet_hidden_depth: 2
14 |     pointnet_activation: "gelu"
15 |     prop_input_dim: 29
16 |     feature_fusion_hidden_depth: 1
17 |     feature_fusion_hidden_dim: 512
18 |     feature_fusion_output_dim: 512
19 |     feature_fusion_activation: "relu"
20 |     feature_fusion_add_input_activation: false
21 |     feature_fusion_add_output_activation: false
22 |     action_dim: 8
23 |     action_net_gmm_n_modes: 5
24 |     action_net_hidden_dim: 128
25 |     action_net_hidden_depth: 3
26 |     action_net_activation: "relu"
27 |     deterministic_inference: true
28 |     gmm_low_noise_eval: true
29 | 


--------------------------------------------------------------------------------
/main/cfg/distillation_student_arch/rnn_pointnet.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | arch_name: rnn_pointnet
 4 | rnn_horizon: 5
 5 | module:
 6 |   student_policy:
 7 |     _target_: transic.distillation.policy.RNNPointNetPolicy
 8 |     point_channels: 3
 9 |     subtract_point_mean: false
10 |     add_ee_embd: true
11 |     ee_embd_dim: 128
12 |     pointnet_output_dim: 256
13 |     pointnet_hidden_dim: 256
14 |     pointnet_hidden_depth: 2
15 |     pointnet_activation: "gelu"
16 |     prop_input_dim: 29
17 |     feature_fusion_hidden_depth: 1
18 |     feature_fusion_hidden_dim: 512
19 |     feature_fusion_output_dim: 512
20 |     feature_fusion_activation: "relu"
21 |     feature_fusion_add_input_activation: false
22 |     feature_fusion_add_output_activation: false
23 |     rnn_type: "lstm"
24 |     rnn_n_layers: 2
25 |     rnn_hidden_dim: 512
26 |     ctx_len: ${rnn_horizon}
27 |     action_dim: 8
28 |     action_net_gmm_n_modes: 5
29 |     action_net_hidden_dim: 128
30 |     action_net_hidden_depth: 3
31 |     action_net_activation: "relu"
32 |     deterministic_inference: true
33 |     gmm_low_noise_eval: true
34 | 
35 | data_module:
36 |   ctx_len: ${rnn_horizon}


--------------------------------------------------------------------------------
/main/cfg/residual_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_  # all below configs will override this conf.yaml
 3 |   - residual_policy_arch: ???
 4 |   - residual_policy_task: ???
 5 | 
 6 | run_name: "residual_policy_${arch_name}_lr${lr}_wd${wd}_b${bs}"
 7 | exp_root_dir: ???
 8 | 
 9 | arch_name: ???  # filled by residual_policy_arch
10 | 
11 | # ====== main cfg ======
12 | seed: -1
13 | gpus: 1
14 | lr: 1e-4
15 | wd: 0.0
16 | bs: 32
17 | vbs: 32
18 | data_dir: ???
19 | eval_interval: 1
20 | 
21 | # ------ logging ------
22 | use_wandb: true
23 | wandb_project: ???
24 | wandb_run_name: ${run_name}
25 | 
26 | # ------ module ------
27 | module:
28 |   _target_: transic.residual.module.ResidualPolicyModule
29 |   include_robot_gripper_action_input: true
30 |   learn_gripper_action: true
31 |   # ====== learning ======
32 |   lr: ${lr}
33 |   use_cosine_lr: true
34 |   lr_warmup_steps: 1000
35 |   lr_cosine_steps: 100000
36 |   lr_cosine_min: 1e-6
37 |   optimizer: "adam"
38 |   weight_decay: ${wd}
39 |   intervention_pred_loss_weight: 1.0
40 |   # ====== pcd sampling ======
41 |   pcd_downsample_N: null
42 | 
43 | data_module:
44 |   _target_: transic.residual.data.ResidualDataModule
45 |   data_dir: ${data_dir}
46 |   include_grasp_action: true
47 |   gripper_close_width: 0.025
48 |   gripper_open_width: 0.08
49 |   variable_len_pcd_handle_strategy: "truncate"
50 |   seed: ${seed}
51 |   batch_size: ${bs}
52 |   val_batch_size: ${vbs}
53 |   train_portion: 0.9
54 |   dataloader_num_workers: 8
55 | 
56 | trainer:
57 |   _target_: pytorch_lightning.Trainer
58 |   accelerator: "gpu"
59 |   devices: ${gpus}
60 |   benchmark: true  # enables cudnn.benchmark
61 |   accumulate_grad_batches: 1
62 |   num_sanity_val_steps: 0
63 |   max_epochs: 999999999
64 |   val_check_interval: null
65 |   check_val_every_n_epoch: ${eval_interval}
66 |   gradient_clip_val: 1.0
67 |   checkpoint:  # this sub-dict will be popped to send to ModelCheckpoint as args
68 |   - filename: "epoch{epoch}-val_loss{val/loss:.5f}"
69 |     save_top_k: 5
70 |     save_last: true
71 |     monitor: "val/loss"
72 |     mode: min
73 |     auto_insert_metric_name: false  # prevent creating subfolder caused by the slash
74 |   - filename: "epoch{epoch}-val_intervention_acc{val/intervention_acc:.5f}"
75 |     save_top_k: 3
76 |     save_last: false
77 |     monitor: "val/intervention_acc"
78 |     mode: max
79 |     auto_insert_metric_name: false  # prevent creating subfolder caused by the slash
80 | 
81 | # ----------------------------
82 | 
83 | hydra:
84 |   job:
85 |     chdir: true
86 |   run:
87 |     dir: "."
88 |   output_subdir: null


--------------------------------------------------------------------------------
/main/cfg/residual_policy_arch/perceiver.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | bs: 8
 4 | 
 5 | arch_name: perceiver_mlp
 6 | module:
 7 |   residual_policy:
 8 |     _target_: transic.residual.policy.PerceiverResidualPolicy
 9 |     point_channels: 3
10 |     subtract_point_mean: false
11 |     add_ee_embd: true
12 |     ee_embd_dim: 128
13 |     set_xf_hidden_dim: 256
14 |     set_xf_num_heads: 8
15 |     set_xf_num_queries: 8
16 |     set_xf_pool_type: concat
17 |     set_xf_layer_norm: false
18 |     prop_input_dim: 29
19 |     robot_policy_output_dim: 7
20 |     include_robot_policy_gripper_action_input: true
21 |     robot_policy_gripper_action_embd_dim: 64
22 |     feature_fusion_hidden_depth: 1
23 |     feature_fusion_hidden_dim: 512
24 |     feature_fusion_output_dim: 512
25 |     feature_fusion_activation: "relu"
26 |     feature_fusion_add_input_activation: false
27 |     feature_fusion_add_output_activation: false
28 |     action_dim: 8
29 |     action_net_gmm_n_modes: 5
30 |     action_net_hidden_dim: 128
31 |     action_net_hidden_depth: 3
32 |     action_net_activation: "relu"
33 |     intervention_head_hidden_dim: 128
34 |     intervention_head_hidden_depth: 3
35 |     intervention_head_activation: "relu"
36 |     deterministic_inference: true
37 |     gmm_low_noise_eval: true
38 |     update_intervention_head_only: false
39 |     ckpt_path_if_update_intervention_head_only: null
40 | 
41 | data_module:
42 |   ctx_len: 20  # this is not required by the mlp policy, but just for consistency
43 |   variable_len_pcd_handle_strategy: "pad"  # for set transformer, we can use full PCD


--------------------------------------------------------------------------------
/main/cfg/residual_policy_arch/pointnet.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | bs: 8
 4 | 
 5 | arch_name: pointnet_mlp
 6 | module:
 7 |   residual_policy:
 8 |     _target_: transic.residual.policy.PointNetResidualPolicy
 9 |     point_channels: 3
10 |     subtract_point_mean: false
11 |     add_ee_embd: true
12 |     ee_embd_dim: 128
13 |     pointnet_output_dim: 256
14 |     pointnet_hidden_dim: 256
15 |     pointnet_hidden_depth: 2
16 |     pointnet_activation: "gelu"
17 |     prop_input_dim: 29
18 |     robot_policy_output_dim: 7
19 |     include_robot_policy_gripper_action_input: true
20 |     robot_policy_gripper_action_embd_dim: 64
21 |     feature_fusion_hidden_depth: 1
22 |     feature_fusion_hidden_dim: 512
23 |     feature_fusion_output_dim: 512
24 |     feature_fusion_activation: "relu"
25 |     feature_fusion_add_input_activation: false
26 |     feature_fusion_add_output_activation: false
27 |     action_dim: 8
28 |     action_net_gmm_n_modes: 5
29 |     action_net_hidden_dim: 128
30 |     action_net_hidden_depth: 3
31 |     action_net_activation: "relu"
32 |     intervention_head_hidden_dim: 128
33 |     intervention_head_hidden_depth: 3
34 |     intervention_head_activation: "relu"
35 |     deterministic_inference: true
36 |     gmm_low_noise_eval: true
37 |     update_intervention_head_only: false
38 |     ckpt_path_if_update_intervention_head_only: null
39 | 
40 | data_module:
41 |   ctx_len: 20  # this is not required by the mlp policy, but just for consistency


--------------------------------------------------------------------------------
/main/cfg/residual_policy_task/default.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | 


--------------------------------------------------------------------------------
/main/cfg/residual_policy_task/insert.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | module:
 4 |   # ====== pcd sampling ======
 5 |   pcd_downsample_N: 1000
 6 |   include_robot_gripper_action_input: false
 7 |   learn_gripper_action: false
 8 |   residual_policy:
 9 |     include_robot_policy_gripper_action_input: false
10 |     action_dim: 7
11 | 
12 | data_module:
13 |   include_grasp_action: false
14 | 


--------------------------------------------------------------------------------
/main/cfg/rl_train/InsertFullPPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 |   algo:
 4 |     name: ppo
 5 | 
 6 |   model:
 7 |     name: my_continuous_a2c_logstd
 8 | 
 9 |   network:
10 |     name: dict_obs_actor_critic
11 |     separate: False
12 | 
13 |     space:
14 |       continuous:
15 |         mu_activation: None
16 |         sigma_activation: None
17 |         mu_init:
18 |           name: default
19 |         sigma_init:
20 |           name: const_initializer
21 |           val: 0
22 |         fixed_sigma: True
23 | 
24 |     dict_feature_encoder:
25 |       _target_: transic.nn.features.SimpleFeatureFusion
26 |       extractors:
27 |         privileged:
28 |           _target_: transic.nn.features.Identity
29 |           input_dim: 130
30 |         proprioception:
31 |           _target_: transic.nn.features.Identity
32 |           input_dim: 29
33 |       hidden_depth: 1
34 |       hidden_dim: 256
35 |       output_dim: 256
36 |       activation: "relu"
37 |       add_input_activation: false
38 |       add_output_activation: false
39 | 
40 |     mlp:
41 |       units: [256, 128, 64]
42 |       activation: elu
43 |       d2rl: False
44 | 
45 |       initializer:
46 |         name: default
47 |       regularizer:
48 |         name: None
49 | 
50 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
51 |   load_path: ${...checkpoint} # path to the checkpoint to load
52 | 
53 |   config:
54 |     name: ${resolve_default:InsertFull,${....experiment}}
55 |     full_experiment_name: ${.name}
56 |     env_name: rlgpu
57 |     multi_gpu: ${....multi_gpu}
58 |     ppo: True
59 |     mixed_precision: False
60 |     normalize_input: True
61 |     normalize_input_excluded_keys: []
62 |     normalize_value: True
63 |     value_bootstrap: True
64 |     num_actors: ${....task.env.numEnvs}
65 |     reward_shaper:
66 |       scale_value: 1.0
67 |     normalize_advantage: True
68 |     gamma: 0.99
69 |     tau: 0.95
70 |     learning_rate: 5e-4
71 |     lr_schedule: adaptive
72 |     schedule_type: standard
73 |     kl_threshold: 0.008
74 |     score_to_win: 10000
75 |     max_epochs: ${resolve_default:10000,${....max_iterations}}
76 |     save_best_after: 200
77 |     save_frequency: 100
78 |     print_stats: True
79 |     grad_norm: 1.0
80 |     entropy_coef: 0.0
81 |     truncate_grads: True
82 |     e_clip: 0.2
83 |     horizon_length: 32
84 |     minibatch_size: 4096
85 |     mini_epochs: 5
86 |     critic_coef: 4
87 |     clip_value: True
88 |     seq_len: 4
89 |     bounds_loss_coef: 0.0001


--------------------------------------------------------------------------------
/main/cfg/rl_train/InsertSinglePPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 |   algo:
 4 |     name: ppo
 5 | 
 6 |   model:
 7 |     name: my_continuous_a2c_logstd
 8 | 
 9 |   network:
10 |     name: dict_obs_actor_critic
11 |     separate: False
12 | 
13 |     space:
14 |       continuous:
15 |         mu_activation: None
16 |         sigma_activation: None
17 |         mu_init:
18 |           name: default
19 |         sigma_init:
20 |           name: const_initializer
21 |           val: 0
22 |         fixed_sigma: True
23 | 
24 |     dict_feature_encoder:
25 |       _target_: transic.nn.features.SimpleFeatureFusion
26 |       extractors:
27 |         privileged:
28 |           _target_: transic.nn.features.Identity
29 |           input_dim: 76
30 |         proprioception:
31 |           _target_: transic.nn.features.Identity
32 |           input_dim: 29
33 |       hidden_depth: 1
34 |       hidden_dim: 256
35 |       output_dim: 256
36 |       activation: "relu"
37 |       add_input_activation: false
38 |       add_output_activation: false
39 | 
40 |     mlp:
41 |       units: [256, 128, 64]
42 |       activation: elu
43 |       d2rl: False
44 | 
45 |       initializer:
46 |         name: default
47 |       regularizer:
48 |         name: None
49 | 
50 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
51 |   load_path: ${...checkpoint} # path to the checkpoint to load
52 | 
53 |   config:
54 |     name: ${resolve_default:InsertSingle,${....experiment}}
55 |     full_experiment_name: ${.name}
56 |     env_name: rlgpu
57 |     multi_gpu: ${....multi_gpu}
58 |     ppo: True
59 |     mixed_precision: False
60 |     normalize_input: True
61 |     normalize_input_excluded_keys: []
62 |     normalize_value: True
63 |     value_bootstrap: True
64 |     num_actors: ${....task.env.numEnvs}
65 |     reward_shaper:
66 |       scale_value: 1.0
67 |     normalize_advantage: True
68 |     gamma: 0.99
69 |     tau: 0.95
70 |     learning_rate: 5e-4
71 |     lr_schedule: adaptive
72 |     schedule_type: standard
73 |     kl_threshold: 0.008
74 |     score_to_win: 10000
75 |     max_epochs: ${resolve_default:10000,${....max_iterations}}
76 |     save_best_after: 200
77 |     save_frequency: 100
78 |     print_stats: True
79 |     grad_norm: 1.0
80 |     entropy_coef: 0.0
81 |     truncate_grads: True
82 |     e_clip: 0.2
83 |     horizon_length: 32
84 |     minibatch_size: 16384
85 |     mini_epochs: 5
86 |     critic_coef: 4
87 |     clip_value: True
88 |     seq_len: 4
89 |     bounds_loss_coef: 0.0001


--------------------------------------------------------------------------------
/main/cfg/rl_train/LiftLeanedLegPPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 |   algo:
 4 |     name: ppo
 5 | 
 6 |   model:
 7 |     name: my_continuous_a2c_logstd
 8 | 
 9 |   network:
10 |     name: dict_obs_actor_critic
11 |     separate: False
12 | 
13 |     space:
14 |       continuous:
15 |         mu_activation: None
16 |         sigma_activation: None
17 |         mu_init:
18 |           name: default
19 |         sigma_init:
20 |           name: const_initializer
21 |           val: 0
22 |         fixed_sigma: True
23 | 
24 |     dict_feature_encoder:
25 |       _target_: transic.nn.features.SimpleFeatureFusion
26 |       extractors:
27 |         privileged:
28 |           _target_: transic.nn.features.Identity
29 |           input_dim: 136
30 |         proprioception:
31 |           _target_: transic.nn.features.Identity
32 |           input_dim: 29
33 |       hidden_depth: 1
34 |       hidden_dim: 256
35 |       output_dim: 256
36 |       activation: "relu"
37 |       add_input_activation: false
38 |       add_output_activation: false
39 | 
40 |     mlp:
41 |       units: [256, 128, 64]
42 |       activation: elu
43 |       d2rl: False
44 | 
45 |       initializer:
46 |         name: default
47 |       regularizer:
48 |         name: None
49 | 
50 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
51 |   load_path: ${...checkpoint} # path to the checkpoint to load
52 | 
53 |   config:
54 |     name: ${resolve_default:LiftLeanedLeg,${....experiment}}
55 |     full_experiment_name: ${.name}
56 |     env_name: rlgpu
57 |     multi_gpu: ${....multi_gpu}
58 |     ppo: True
59 |     mixed_precision: False
60 |     normalize_input: True
61 |     normalize_input_excluded_keys: []
62 |     normalize_value: True
63 |     value_bootstrap: True
64 |     num_actors: ${....task.env.numEnvs}
65 |     reward_shaper:
66 |       scale_value: 1.0
67 |     normalize_advantage: True
68 |     gamma: 0.99
69 |     tau: 0.95
70 |     learning_rate: 5e-4
71 |     lr_schedule: adaptive
72 |     schedule_type: standard
73 |     kl_threshold: 0.008
74 |     score_to_win: 10000
75 |     max_epochs: ${resolve_default:10000,${....max_iterations}}
76 |     save_best_after: 200
77 |     save_frequency: 100
78 |     print_stats: True
79 |     grad_norm: 1.0
80 |     entropy_coef: 0.0
81 |     truncate_grads: True
82 |     e_clip: 0.2
83 |     horizon_length: 32
84 |     minibatch_size: 4096
85 |     mini_epochs: 5
86 |     critic_coef: 4
87 |     clip_value: True
88 |     seq_len: 4
89 |     bounds_loss_coef: 0.0001


--------------------------------------------------------------------------------
/main/cfg/rl_train/ReachAndGraspFullPPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 |   algo:
 4 |     name: ppo
 5 | 
 6 |   model:
 7 |     name: my_continuous_a2c_logstd
 8 | 
 9 |   network:
10 |     name: dict_obs_actor_critic
11 |     separate: False
12 | 
13 |     space:
14 |       continuous:
15 |         mu_activation: None
16 |         sigma_activation: None
17 |         mu_init:
18 |           name: default
19 |         sigma_init:
20 |           name: const_initializer
21 |           val: 0
22 |         fixed_sigma: True
23 | 
24 |     dict_feature_encoder:
25 |       _target_: transic.nn.features.SimpleFeatureFusion
26 |       extractors:
27 |         privileged:
28 |           _target_: transic.nn.features.Identity
29 |           input_dim: 58
30 |         proprioception:
31 |           _target_: transic.nn.features.Identity
32 |           input_dim: 29
33 |       hidden_depth: 1
34 |       hidden_dim: 256
35 |       output_dim: 256
36 |       activation: "relu"
37 |       add_input_activation: false
38 |       add_output_activation: false
39 | 
40 |     mlp:
41 |       units: [256, 128, 64]
42 |       activation: elu
43 |       d2rl: False
44 | 
45 |       initializer:
46 |         name: default
47 |       regularizer:
48 |         name: None
49 | 
50 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
51 |   load_path: ${...checkpoint} # path to the checkpoint to load
52 | 
53 |   config:
54 |     name: ${resolve_default:ReachAndGraspFull,${....experiment}}
55 |     full_experiment_name: ${.name}
56 |     env_name: rlgpu
57 |     multi_gpu: ${....multi_gpu}
58 |     ppo: True
59 |     mixed_precision: False
60 |     normalize_input: True
61 |     normalize_input_excluded_keys: []
62 |     normalize_value: True
63 |     value_bootstrap: True
64 |     num_actors: ${....task.env.numEnvs}
65 |     reward_shaper:
66 |       scale_value: 1.0
67 |     normalize_advantage: True
68 |     gamma: 0.99
69 |     tau: 0.95
70 |     learning_rate: 5e-4
71 |     lr_schedule: adaptive
72 |     schedule_type: standard
73 |     kl_threshold: 0.008
74 |     score_to_win: 10000
75 |     max_epochs: ${resolve_default:10000,${....max_iterations}}
76 |     save_best_after: 200
77 |     save_frequency: 100
78 |     print_stats: True
79 |     grad_norm: 1.0
80 |     entropy_coef: 0.0
81 |     truncate_grads: True
82 |     e_clip: 0.2
83 |     horizon_length: 32
84 |     minibatch_size: 4096
85 |     mini_epochs: 5
86 |     critic_coef: 4
87 |     clip_value: True
88 |     seq_len: 4
89 |     bounds_loss_coef: 0.0001


--------------------------------------------------------------------------------
/main/cfg/rl_train/ReachAndGraspSinglePPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 |   algo:
 4 |     name: ppo
 5 | 
 6 |   model:
 7 |     name: my_continuous_a2c_logstd
 8 | 
 9 |   network:
10 |     name: dict_obs_actor_critic
11 |     separate: False
12 | 
13 |     space:
14 |       continuous:
15 |         mu_activation: None
16 |         sigma_activation: None
17 |         mu_init:
18 |           name: default
19 |         sigma_init:
20 |           name: const_initializer
21 |           val: 0
22 |         fixed_sigma: True
23 | 
24 |     dict_feature_encoder:
25 |       _target_: transic.nn.features.SimpleFeatureFusion
26 |       extractors:
27 |         privileged:
28 |           _target_: transic.nn.features.Identity
29 |           input_dim: 58
30 |         proprioception:
31 |           _target_: transic.nn.features.Identity
32 |           input_dim: 29
33 |       hidden_depth: 1
34 |       hidden_dim: 256
35 |       output_dim: 256
36 |       activation: "relu"
37 |       add_input_activation: false
38 |       add_output_activation: false
39 | 
40 |     mlp:
41 |       units: [256, 128, 64]
42 |       activation: elu
43 |       d2rl: False
44 | 
45 |       initializer:
46 |         name: default
47 |       regularizer:
48 |         name: None
49 | 
50 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
51 |   load_path: ${...checkpoint} # path to the checkpoint to load
52 | 
53 |   config:
54 |     name: ${resolve_default:ReachAndGraspSingle,${....experiment}}
55 |     full_experiment_name: ${.name}
56 |     env_name: rlgpu
57 |     multi_gpu: ${....multi_gpu}
58 |     ppo: True
59 |     mixed_precision: False
60 |     normalize_input: True
61 |     normalize_input_excluded_keys: []
62 |     normalize_value: True
63 |     value_bootstrap: True
64 |     num_actors: ${....task.env.numEnvs}
65 |     reward_shaper:
66 |       scale_value: 1.0
67 |     normalize_advantage: True
68 |     gamma: 0.99
69 |     tau: 0.95
70 |     learning_rate: 5e-4
71 |     lr_schedule: adaptive
72 |     schedule_type: standard
73 |     kl_threshold: 0.008
74 |     score_to_win: 10000
75 |     max_epochs: ${resolve_default:10000,${....max_iterations}}
76 |     save_best_after: 100
77 |     save_frequency: 50
78 |     print_stats: True
79 |     grad_norm: 1.0
80 |     entropy_coef: 0.0
81 |     truncate_grads: True
82 |     e_clip: 0.2
83 |     horizon_length: 32
84 |     minibatch_size: 16384
85 |     mini_epochs: 5
86 |     critic_coef: 4
87 |     clip_value: True
88 |     seq_len: 4
89 |     bounds_loss_coef: 0.0001
90 | 


--------------------------------------------------------------------------------
/main/cfg/rl_train/ScrewFullPPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 |   algo:
 4 |     name: ppo
 5 | 
 6 |   model:
 7 |     name: my_continuous_a2c_logstd
 8 | 
 9 |   network:
10 |     name: dict_obs_actor_critic
11 |     separate: False
12 | 
13 |     space:
14 |       continuous:
15 |         mu_activation: None
16 |         sigma_activation: None
17 |         mu_init:
18 |           name: default
19 |         sigma_init:
20 |           name: const_initializer
21 |           val: 0
22 |         fixed_sigma: True
23 | 
24 |     dict_feature_encoder:
25 |       _target_: transic.nn.features.SimpleFeatureFusion
26 |       extractors:
27 |         privileged:
28 |           _target_: transic.nn.features.Identity
29 |           input_dim: 74
30 |         proprioception:
31 |           _target_: transic.nn.features.Identity
32 |           input_dim: 29
33 |       hidden_depth: 1
34 |       hidden_dim: 256
35 |       output_dim: 256
36 |       activation: "relu"
37 |       add_input_activation: false
38 |       add_output_activation: false
39 | 
40 |     mlp:
41 |       units: [256, 128, 64]
42 |       activation: elu
43 |       d2rl: False
44 | 
45 |       initializer:
46 |         name: default
47 |       regularizer:
48 |         name: None
49 | 
50 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
51 |   load_path: ${...checkpoint} # path to the checkpoint to load
52 | 
53 |   config:
54 |     name: ${resolve_default:ScrewFull,${....experiment}}
55 |     full_experiment_name: ${.name}
56 |     env_name: rlgpu
57 |     multi_gpu: ${....multi_gpu}
58 |     ppo: True
59 |     mixed_precision: False
60 |     normalize_input: True
61 |     normalize_input_excluded_keys: []
62 |     normalize_value: True
63 |     value_bootstrap: True
64 |     num_actors: ${....task.env.numEnvs}
65 |     reward_shaper:
66 |       scale_value: 1.0
67 |     normalize_advantage: True
68 |     gamma: 0.99
69 |     tau: 0.95
70 |     learning_rate: 5e-4
71 |     lr_schedule: adaptive
72 |     schedule_type: standard
73 |     kl_threshold: 0.008
74 |     score_to_win: 10000
75 |     max_epochs: ${resolve_default:10000,${....max_iterations}}
76 |     save_best_after: 200
77 |     save_frequency: 100
78 |     print_stats: True
79 |     grad_norm: 1.0
80 |     entropy_coef: 0.0
81 |     truncate_grads: True
82 |     e_clip: 0.2
83 |     horizon_length: 32
84 |     minibatch_size: 16384
85 |     mini_epochs: 5
86 |     critic_coef: 4
87 |     clip_value: True
88 |     seq_len: 4
89 |     bounds_loss_coef: 0.0001
90 | 


--------------------------------------------------------------------------------
/main/cfg/rl_train/ScrewSinglePPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 |   algo:
 4 |     name: ppo
 5 | 
 6 |   model:
 7 |     name: my_continuous_a2c_logstd
 8 | 
 9 |   network:
10 |     name: dict_obs_actor_critic
11 |     separate: False
12 | 
13 |     space:
14 |       continuous:
15 |         mu_activation: None
16 |         sigma_activation: None
17 |         mu_init:
18 |           name: default
19 |         sigma_init:
20 |           name: const_initializer
21 |           val: 0
22 |         fixed_sigma: True
23 | 
24 |     dict_feature_encoder:
25 |       _target_: transic.nn.features.SimpleFeatureFusion
26 |       extractors:
27 |         privileged:
28 |           _target_: transic.nn.features.Identity
29 |           input_dim: 74
30 |         proprioception:
31 |           _target_: transic.nn.features.Identity
32 |           input_dim: 29
33 |       hidden_depth: 1
34 |       hidden_dim: 256
35 |       output_dim: 256
36 |       activation: "relu"
37 |       add_input_activation: false
38 |       add_output_activation: false
39 | 
40 |     mlp:
41 |       units: [256, 128, 64]
42 |       activation: elu
43 |       d2rl: False
44 | 
45 |       initializer:
46 |         name: default
47 |       regularizer:
48 |         name: None
49 | 
50 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
51 |   load_path: ${...checkpoint} # path to the checkpoint to load
52 | 
53 |   config:
54 |     name: ${resolve_default:ScrewSingle,${....experiment}}
55 |     full_experiment_name: ${.name}
56 |     env_name: rlgpu
57 |     multi_gpu: ${....multi_gpu}
58 |     ppo: True
59 |     mixed_precision: False
60 |     normalize_input: True
61 |     normalize_input_excluded_keys: []
62 |     normalize_value: True
63 |     value_bootstrap: True
64 |     num_actors: ${....task.env.numEnvs}
65 |     reward_shaper:
66 |       scale_value: 1.0
67 |     normalize_advantage: True
68 |     gamma: 0.99
69 |     tau: 0.95
70 |     learning_rate: 5e-4
71 |     lr_schedule: adaptive
72 |     schedule_type: standard
73 |     kl_threshold: 0.008
74 |     score_to_win: 10000
75 |     max_epochs: ${resolve_default:10000,${....max_iterations}}
76 |     save_best_after: 200
77 |     save_frequency: 100
78 |     print_stats: True
79 |     grad_norm: 1.0
80 |     entropy_coef: 0.0
81 |     truncate_grads: True
82 |     e_clip: 0.2
83 |     horizon_length: 32
84 |     minibatch_size: 16384
85 |     mini_epochs: 5
86 |     critic_coef: 4
87 |     clip_value: True
88 |     seq_len: 4
89 |     bounds_loss_coef: 0.0001
90 | 


--------------------------------------------------------------------------------
/main/cfg/rl_train/StabilizePPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 |   algo:
 4 |     name: ppo
 5 | 
 6 |   model:
 7 |     name: my_continuous_a2c_logstd
 8 | 
 9 |   network:
10 |     name: dict_obs_actor_critic
11 |     separate: False
12 | 
13 |     space:
14 |       continuous:
15 |         mu_activation: None
16 |         sigma_activation: None
17 |         mu_init:
18 |           name: default
19 |         sigma_init:
20 |           name: const_initializer
21 |           val: 0
22 |         fixed_sigma: True
23 | 
24 |     dict_feature_encoder:
25 |       _target_: transic.nn.features.SimpleFeatureFusion
26 |       extractors:
27 |         privileged:
28 |           _target_: transic.nn.features.Identity
29 |           input_dim: 145
30 |         proprioception:
31 |           _target_: transic.nn.features.Identity
32 |           input_dim: 36
33 |       hidden_depth: 1
34 |       hidden_dim: 256
35 |       output_dim: 256
36 |       activation: "relu"
37 |       add_input_activation: false
38 |       add_output_activation: false
39 | 
40 |     mlp:
41 |       units: [256, 128, 64]
42 |       activation: elu
43 |       d2rl: False
44 | 
45 |       initializer:
46 |         name: default
47 |       regularizer:
48 |         name: None
49 | 
50 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
51 |   load_path: ${...checkpoint} # path to the checkpoint to load
52 | 
53 |   config:
54 |     name: ${resolve_default:Stabilize,${....experiment}}
55 |     full_experiment_name: ${.name}
56 |     env_name: rlgpu
57 |     multi_gpu: ${....multi_gpu}
58 |     ppo: True
59 |     mixed_precision: False
60 |     normalize_input: True
61 |     normalize_input_excluded_keys: []
62 |     normalize_value: True
63 |     value_bootstrap: True
64 |     num_actors: ${....task.env.numEnvs}
65 |     reward_shaper:
66 |       scale_value: 1.0
67 |     normalize_advantage: True
68 |     gamma: 0.99
69 |     tau: 0.95
70 |     learning_rate: 5e-4
71 |     lr_schedule: adaptive
72 |     schedule_type: standard
73 |     kl_threshold: 0.008
74 |     score_to_win: 10000
75 |     max_epochs: ${resolve_default:10000,${....max_iterations}}
76 |     save_best_after: 200
77 |     save_frequency: 100
78 |     print_stats: True
79 |     grad_norm: 1.0
80 |     entropy_coef: 0.0
81 |     truncate_grads: True
82 |     e_clip: 0.2
83 |     horizon_length: 32
84 |     minibatch_size: 4096
85 |     mini_epochs: 5
86 |     critic_coef: 4
87 |     clip_value: True
88 |     seq_len: 4
89 |     bounds_loss_coef: 0.0001
90 | 


--------------------------------------------------------------------------------
/main/cfg/task/InsertFullPCD.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: InsertFullPCD
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym. 
  8 | env:
  9 |   numEnvs: ${resolve_default:2048,${...num_envs}}
 10 |   episodeLength: 100
 11 | 
 12 |   furniture: "square_table"
 13 | 
 14 |   pcdMaskRatio: null
 15 |   pcdN: 768
 16 | 
 17 |   clipObservations: 5.0
 18 |   clipActions: 1.0
 19 | 
 20 |   frankaDofNoise: 0.25
 21 | 
 22 |   distanceReward: 0
 23 |   successReward: 1
 24 | 
 25 |   selectedLegIdx: ???
 26 | 
 27 |   aggregateMode: 3
 28 | 
 29 |   actionScale: 1.0
 30 |   useQuatRot: false
 31 | 
 32 |   frankaDefaultDofPos: [-0.0186, -0.1682,  0.0344, -2.6468,  0.0269,  2.5371,  0.7211,  0.0145, 0.0145]
 33 | 
 34 |   pcAugmentation:
 35 |     enabled: true
 36 |     applyP: 0.4
 37 |     randomTransLow: [-0.04, -0.04, -0.04]
 38 |     randomTransHigh: [0.04, 0.04, 0.04]
 39 | 
 40 |     jitterRatio: 0.1
 41 |     jitterSigma: 0.01
 42 |     jitterHigh: 0.015
 43 |     jitterLow: -0.015
 44 | 
 45 |   propObsDim: 29
 46 |   obsKeys:
 47 |     - q
 48 |     - cos_q
 49 |     - sin_q
 50 |     - eef_pos
 51 |     - eef_quat
 52 |     - gripper_width
 53 | 
 54 |   privilegedObsDim: 130
 55 |   privilegedObsKeys:
 56 |     - square_table_top_pos
 57 |     - square_table_top_rot
 58 |     - square_table_top_vel
 59 |     - square_table_leg1_pos
 60 |     - square_table_leg1_rot
 61 |     - square_table_leg1_vel
 62 |     - square_table_leg2_pos
 63 |     - square_table_leg2_rot
 64 |     - square_table_leg2_vel
 65 |     - square_table_leg3_pos
 66 |     - square_table_leg3_rot
 67 |     - square_table_leg3_vel
 68 |     - square_table_leg4_pos
 69 |     - square_table_leg4_rot
 70 |     - square_table_leg4_vel
 71 |     - obstacle_front_pos
 72 |     - obstacle_left_pos
 73 |     - obstacle_right_pos
 74 |     - eef_vel
 75 |     - ftip_center_pos
 76 |     - eef_lf_pos
 77 |     - eef_rf_pos
 78 |     - q
 79 |     - cos_q
 80 |     - sin_q
 81 |     - dq
 82 |     - q_gripper
 83 |     - target_xyz
 84 | 
 85 |   # set to True if you use camera sensors in the environment
 86 |   enableCameraSensors: False
 87 | 
 88 | sim:
 89 |   dt: 0.01667 # 1/60
 90 |   substeps: 2
 91 |   up_axis: "z"
 92 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 93 |   gravity: [0.0, 0.0, -9.81]
 94 |   physx:
 95 |     num_threads: ${....num_threads}
 96 |     solver_type: ${....solver_type}
 97 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 98 |     num_position_iterations: 8
 99 |     num_velocity_iterations: 1
100 |     contact_offset: 0.005
101 |     rest_offset: 0.0
102 |     bounce_threshold_velocity: 0.2
103 |     max_depenetration_velocity: 1000.0
104 |     default_buffer_size_multiplier: 5.0
105 |     max_gpu_contact_pairs: 1048576 # 1024*1024
106 |     num_subscenes: ${....num_subscenes}
107 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
108 | 
109 | task:
110 |   randomize: False
111 | 


--------------------------------------------------------------------------------
/main/cfg/task/InsertSingle.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: InsertSingle
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym. 
  8 | env:
  9 |   numEnvs: ${resolve_default:8192,${...num_envs}}
 10 |   episodeLength: 100
 11 | 
 12 |   furniture: "table_with_one_leg"
 13 | 
 14 |   clipObservations: 5.0
 15 |   clipActions: 1.0
 16 | 
 17 |   frankaDofNoise: 0.25
 18 | 
 19 |   distanceReward: 0.1
 20 |   successReward: 100.0
 21 |   rotationNoise: 60
 22 | 
 23 |   aggregateMode: 3
 24 | 
 25 |   actionScale: 1.0
 26 |   useQuatRot: false
 27 | 
 28 |   frankaDefaultDofPos: [-0.0186, -0.1682,  0.0344, -2.6468,  0.0269,  2.5371,  0.7211,  0.0145, 0.0145]
 29 | 
 30 |   # for distillation
 31 |   propDumpInfo:
 32 |     q: 7
 33 |     eef_pos: 3
 34 |     eef_quat: 4
 35 |     gripper_width: 1
 36 | 
 37 |   propObsDim: 29
 38 |   obsKeys:
 39 |     - q
 40 |     - cos_q
 41 |     - sin_q
 42 |     - eef_pos
 43 |     - eef_quat
 44 |     - gripper_width
 45 | 
 46 |   privilegedObsDim: 76
 47 |   privilegedObsKeys:
 48 |     - square_table_top_pos
 49 |     - square_table_top_rot
 50 |     - square_table_top_vel
 51 |     - square_table_leg4_pos
 52 |     - square_table_leg4_rot
 53 |     - square_table_leg4_vel
 54 |     - eef_vel
 55 |     - ftip_center_pos
 56 |     - obstacle_front_pos
 57 |     - obstacle_left_pos
 58 |     - obstacle_right_pos
 59 |     - front_wall_cf
 60 |     - left_wall_cf
 61 |     - right_wall_cf
 62 |     - square_table_top_cf
 63 |     - square_table_leg4_cf
 64 |     - eef_lf_pos
 65 |     - eef_rf_pos
 66 |     - dq
 67 |     - target_xy
 68 | 
 69 |   # set to True if you use camera sensors in the environment
 70 |   enableCameraSensors: False
 71 | 
 72 | sim:
 73 |   dt: 0.01667 # 1/60
 74 |   substeps: 2
 75 |   up_axis: "z"
 76 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 77 |   gravity: [0.0, 0.0, -9.81]
 78 |   physx:
 79 |     num_threads: ${....num_threads}
 80 |     solver_type: ${....solver_type}
 81 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 82 |     num_position_iterations: 8
 83 |     num_velocity_iterations: 1
 84 |     contact_offset: 0.005
 85 |     rest_offset: 0.0
 86 |     bounce_threshold_velocity: 0.2
 87 |     max_depenetration_velocity: 1000.0
 88 |     default_buffer_size_multiplier: 5.0
 89 |     max_gpu_contact_pairs: 1048576 # 1024*1024
 90 |     num_subscenes: ${....num_subscenes}
 91 |     contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
 92 | 
 93 | task:
 94 |   randomize: True
 95 |   randomization_params:
 96 |     frequency: 1
 97 |     sim_params:
 98 |       gravity:
 99 |         range: [ 0, 0.4 ]
100 |         operation: "additive"
101 |         distribution: "uniform"
102 |         schedule: "linear"
103 |         schedule_steps: 100000000
104 |     actor_params:
105 |       franka:
106 |         color: True
107 |         rigid_body_properties:
108 |           mass:
109 |             range: [0.5, 1.5]
110 |             operation: "scaling"
111 |             distribution: "uniform"
112 |             setup_only: True
113 |             schedule: "linear"
114 |             schedule_steps: 100000000
115 |         rigid_shape_properties:
116 |           friction:
117 |             num_buckets: 250
118 |             range: [ 0.7, 1.3 ]
119 |             operation: "scaling"
120 |             distribution: "uniform"
121 |             schedule: "linear"
122 |             schedule_steps: 100000000
123 |         dof_properties:
124 |           lower:
125 |             range: [ 1.0, 1.010050167084168 ]
126 |             operation: "scaling"
127 |             distribution: "loguniform"
128 |             schedule: "linear"
129 |             schedule_steps: 100000000
130 |           upper:
131 |             range: [ 1.0, 1.010050167084168 ]
132 |             operation: "scaling"
133 |             distribution: "loguniform"
134 |             schedule: "linear"
135 |             schedule_steps: 100000000
136 |           stiffness:
137 |             range: [ 1.0, 1.010050167084168 ]
138 |             operation: "scaling"
139 |             distribution: "loguniform"
140 |             schedule: "linear"
141 |             schedule_steps: 100000000
142 |           damping:
143 |             range: [ 1.0, 1.010050167084168 ]
144 |             operation: "scaling"
145 |             distribution: "loguniform"
146 |             schedule: "linear"
147 |             schedule_steps: 100000000
148 |       table:
149 |         color: True
150 |         rigid_shape_properties:
151 |           friction:
152 |             num_buckets: 250
153 |             range: [ 0.5, 1.5 ]
154 |             operation: "scaling"
155 |             distribution: "uniform"
156 |             schedule: "linear"
157 |             schedule_steps: 100000000
158 |       square_table_leg4:
159 |         color: True
160 |         rigid_body_properties:
161 |           mass:
162 |             range: [ 0.5, 1.5 ]
163 |             operation: "scaling"
164 |             distribution: "uniform"
165 |             setup_only: True
166 |             schedule: "linear"
167 |             schedule_steps: 100000000
168 |         rigid_shape_properties:
169 |           friction:
170 |             num_buckets: 250
171 |             range: [ 0.5, 1.5 ]
172 |             operation: "scaling"
173 |             distribution: "uniform"
174 |             schedule: "linear"
175 |             schedule_steps: 100000000
176 |           rolling_friction:
177 |             num_buckets: 250
178 |             range: [ 0.5, 1.5 ]
179 |             operation: "scaling"
180 |             distribution: "uniform"
181 |             schedule: "linear"
182 |             schedule_steps: 100000000
183 |           torsion_friction:
184 |             num_buckets: 250
185 |             range: [ 0.5, 1.5 ]
186 |             operation: "scaling"
187 |             distribution: "uniform"
188 |             schedule: "linear"
189 |             schedule_steps: 100000000
190 |           restitution:
191 |             range: [0.0, 1.0]
192 |             operation: "additive"
193 |             distribution: "uniform"
194 |             schedule: "linear"
195 |             schedule_steps: 100000000
196 |           compliance:
197 |             range: [0.0, 1.0]
198 |             operation: "additive"
199 |             distribution: "uniform"
200 |             schedule: "linear"
201 |             schedule_steps: 100000000
202 |       square_table_top:
203 |         color: True
204 |         rigid_body_properties:
205 |           mass:
206 |             range: [ 0.5, 1.5 ]
207 |             operation: "scaling"
208 |             distribution: "uniform"
209 |             setup_only: True
210 |             schedule: "linear"
211 |             schedule_steps: 100000000
212 |         rigid_shape_properties:
213 |           friction:
214 |             num_buckets: 250
215 |             range: [ 0.5, 1.5 ]
216 |             operation: "scaling"
217 |             distribution: "uniform"
218 |             schedule: "linear"
219 |             schedule_steps: 100000000
220 |           rolling_friction:
221 |             num_buckets: 250
222 |             range: [ 0.5, 1.5 ]
223 |             operation: "scaling"
224 |             distribution: "uniform"
225 |             schedule: "linear"
226 |             schedule_steps: 100000000
227 |           torsion_friction:
228 |             num_buckets: 250
229 |             range: [ 0.5, 1.5 ]
230 |             operation: "scaling"
231 |             distribution: "uniform"
232 |             schedule: "linear"
233 |             schedule_steps: 100000000
234 |           restitution:
235 |             range: [0.0, 1.0]
236 |             operation: "additive"
237 |             distribution: "uniform"
238 |             schedule: "linear"
239 |             schedule_steps: 100000000
240 |           compliance:
241 |             range: [0.0, 1.0]
242 |             operation: "additive"
243 |             distribution: "uniform"
244 |             schedule: "linear"
245 |             schedule_steps: 100000000


--------------------------------------------------------------------------------
/main/cfg/task/InsertSinglePCD.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: InsertSinglePCD
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym. 
  8 | env:
  9 |   numEnvs: ${resolve_default:2048,${...num_envs}}
 10 |   episodeLength: 100
 11 | 
 12 |   furniture: "table_with_one_leg"
 13 | 
 14 |   pcdMaskRatio: null
 15 |   pcdN: 768
 16 | 
 17 |   clipObservations: 5.0
 18 |   clipActions: 1.0
 19 | 
 20 |   frankaDofNoise: 0.25
 21 | 
 22 |   distanceReward: 0.1
 23 |   successReward: 100.0
 24 | 
 25 |   aggregateMode: 3
 26 | 
 27 |   actionScale: 1.0
 28 |   useQuatRot: false
 29 | 
 30 |   frankaDefaultDofPos: [-0.0186, -0.1682,  0.0344, -2.6468,  0.0269,  2.5371,  0.7211,  0.0145, 0.0145]
 31 | 
 32 |   pcAugmentation:
 33 |     enabled: true
 34 |     applyP: 0.4
 35 |     randomTransLow: [-0.04, -0.04, -0.04]
 36 |     randomTransHigh: [0.04, 0.04, 0.04]
 37 | 
 38 |     jitterRatio: 0.1
 39 |     jitterSigma: 0.01
 40 |     jitterHigh: 0.015
 41 |     jitterLow: -0.015
 42 | 
 43 |   propObsDim: 29
 44 |   obsKeys:
 45 |     - q
 46 |     - cos_q
 47 |     - sin_q
 48 |     - eef_pos
 49 |     - eef_quat
 50 |     - gripper_width
 51 | 
 52 |   privilegedObsDim: 76
 53 |   privilegedObsKeys:
 54 |     - square_table_top_pos
 55 |     - square_table_top_rot
 56 |     - square_table_top_vel
 57 |     - square_table_leg4_pos
 58 |     - square_table_leg4_rot
 59 |     - square_table_leg4_vel
 60 |     - eef_vel
 61 |     - ftip_center_pos
 62 |     - obstacle_front_pos
 63 |     - obstacle_left_pos
 64 |     - obstacle_right_pos
 65 |     - front_wall_cf
 66 |     - left_wall_cf
 67 |     - right_wall_cf
 68 |     - square_table_top_cf
 69 |     - square_table_leg4_cf
 70 |     - eef_lf_pos
 71 |     - eef_rf_pos
 72 |     - dq
 73 |     - target_xy
 74 | 
 75 |   # set to True if you use camera sensors in the environment
 76 |   enableCameraSensors: False
 77 | 
 78 | sim:
 79 |   dt: 0.01667 # 1/60
 80 |   substeps: 2
 81 |   up_axis: "z"
 82 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 83 |   gravity: [0.0, 0.0, -9.81]
 84 |   physx:
 85 |     num_threads: ${....num_threads}
 86 |     solver_type: ${....solver_type}
 87 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 88 |     num_position_iterations: 8
 89 |     num_velocity_iterations: 1
 90 |     contact_offset: 0.005
 91 |     rest_offset: 0.0
 92 |     bounce_threshold_velocity: 0.2
 93 |     max_depenetration_velocity: 1000.0
 94 |     default_buffer_size_multiplier: 5.0
 95 |     max_gpu_contact_pairs: 1048576 # 1024*1024
 96 |     num_subscenes: ${....num_subscenes}
 97 |     contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
 98 | 
 99 | task:
100 |   randomize: False
101 | 


--------------------------------------------------------------------------------
/main/cfg/task/LiftLeanedLegPCD.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: LiftLeanedLegPCD
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym. 
  8 | env:
  9 |   numEnvs: ${resolve_default:768,${...num_envs}}
 10 |   episodeLength: 100
 11 | 
 12 |   furniture: "square_table"
 13 | 
 14 |   pcdMaskRatio: null
 15 |   pcdN: 768
 16 | 
 17 |   clipObservations: 5.0
 18 |   clipActions: 1.0
 19 | 
 20 |   frankaDofNoise: 0.15
 21 | 
 22 |   successWeight: 1
 23 |   targetLiftHeight: 0.17
 24 |   successEEFTiltThreshold: 5
 25 | 
 26 |   aggregateMode: 3
 27 | 
 28 |   actionScale: 1.0
 29 |   useQuatRot: false
 30 | 
 31 |   frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035]
 32 | 
 33 |   pcAugmentation:
 34 |     enabled: true
 35 |     applyP: 0.4
 36 |     randomTransLow: [-0.04, -0.04, -0.04]
 37 |     randomTransHigh: [0.04, 0.04, 0.04]
 38 | 
 39 |     jitterRatio: 0.1
 40 |     jitterSigma: 0.01
 41 |     jitterHigh: 0.015
 42 |     jitterLow: -0.015
 43 | 
 44 |   propObsDim: 29
 45 |   obsKeys:
 46 |     - q
 47 |     - cos_q
 48 |     - sin_q
 49 |     - eef_pos
 50 |     - eef_quat
 51 |     - gripper_width
 52 | 
 53 |   privilegedObsDim: 136
 54 |   privilegedObsKeys:
 55 |     - square_table_top_pos
 56 |     - square_table_top_rot
 57 |     - square_table_top_vel
 58 |     - square_table_leg1_pos
 59 |     - square_table_leg1_rot
 60 |     - square_table_leg1_vel
 61 |     - square_table_leg2_pos
 62 |     - square_table_leg2_rot
 63 |     - square_table_leg2_vel
 64 |     - square_table_leg3_pos
 65 |     - square_table_leg3_rot
 66 |     - square_table_leg3_vel
 67 |     - square_table_leg4_pos
 68 |     - square_table_leg4_rot
 69 |     - square_table_leg4_vel
 70 |     - obstacle_front_pos
 71 |     - obstacle_left_pos
 72 |     - obstacle_right_pos
 73 |     - eef_vel
 74 |     - eef_lf_pos
 75 |     - eef_rf_pos
 76 |     - q
 77 |     - cos_q
 78 |     - sin_q
 79 |     - dq
 80 |     - q_gripper
 81 |     - front_wall_cf
 82 |     - left_wall_cf
 83 |     - right_wall_cf
 84 |     - square_table_leg4_cf
 85 | 
 86 |   # set to True if you use camera sensors in the environment
 87 |   enableCameraSensors: False
 88 | 
 89 | sim:
 90 |   dt: 0.01667 # 1/60
 91 |   substeps: 2
 92 |   up_axis: "z"
 93 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 94 |   gravity: [0.0, 0.0, -9.81]
 95 |   physx:
 96 |     num_threads: ${....num_threads}
 97 |     solver_type: ${....solver_type}
 98 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 99 |     num_position_iterations: 8
100 |     num_velocity_iterations: 1
101 |     contact_offset: 0.005
102 |     rest_offset: 0.0
103 |     bounce_threshold_velocity: 0.2
104 |     max_depenetration_velocity: 1000.0
105 |     default_buffer_size_multiplier: 5.0
106 |     max_gpu_contact_pairs: 1048576 # 1024*1024
107 |     num_subscenes: ${....num_subscenes}
108 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
109 | 
110 | task:
111 |   randomize: False
112 | 


--------------------------------------------------------------------------------
/main/cfg/task/ReachAndGraspFullPCD.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: ReachAndGraspFullPCD
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym. 
  8 | env:
  9 |   numEnvs: ${resolve_default:768,${...num_envs}}
 10 |   episodeLength: 800
 11 | 
 12 |   furniture: "square_table_patch_fix"
 13 | 
 14 |   pcdMaskRatio: null
 15 |   pcdN: 768
 16 | 
 17 |   clipObservations: 5.0
 18 |   clipActions: 1.0
 19 | 
 20 |   frankaDofNoise: 0.25
 21 | 
 22 |   successWeight: 1
 23 |   targetLiftHeight: 0.05
 24 | 
 25 |   selectedLegIdx: ???
 26 | 
 27 |   aggregateMode: 3
 28 | 
 29 |   actionScale: 1.0
 30 |   useQuatRot: false
 31 | 
 32 |   frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035]
 33 | 
 34 |   pcAugmentation:
 35 |     enabled: true
 36 |     applyP: 0.4
 37 |     randomTransLow: [-0.04, -0.04, -0.04]
 38 |     randomTransHigh: [0.04, 0.04, 0.04]
 39 | 
 40 |     jitterRatio: 0.1
 41 |     jitterSigma: 0.01
 42 |     jitterHigh: 0.015
 43 |     jitterLow: -0.015
 44 | 
 45 |   propObsDim: 29
 46 |   obsKeys:
 47 |     - q
 48 |     - cos_q
 49 |     - sin_q
 50 |     - eef_pos
 51 |     - eef_quat
 52 |     - gripper_width
 53 | 
 54 |   privilegedObsDim: 136
 55 |   privilegedObsKeys:
 56 |     - square_table_top_pos
 57 |     - square_table_top_rot
 58 |     - square_table_top_vel
 59 |     - square_table_leg1_pos
 60 |     - square_table_leg1_rot
 61 |     - square_table_leg1_vel
 62 |     - square_table_leg2_pos
 63 |     - square_table_leg2_rot
 64 |     - square_table_leg2_vel
 65 |     - square_table_leg3_pos
 66 |     - square_table_leg3_rot
 67 |     - square_table_leg3_vel
 68 |     - leg_pos
 69 |     - leg_rot
 70 |     - leg_vel
 71 |     - obstacle_front_pos
 72 |     - obstacle_left_pos
 73 |     - obstacle_right_pos
 74 |     - eef_vel
 75 |     - eef_lf_pos
 76 |     - eef_rf_pos
 77 |     - q
 78 |     - cos_q
 79 |     - sin_q
 80 |     - dq
 81 |     - q_gripper
 82 |     - front_wall_cf
 83 |     - left_wall_cf
 84 |     - right_wall_cf
 85 |     - leg_cf
 86 | 
 87 |   # set to True if you use camera sensors in the environment
 88 |   enableCameraSensors: False
 89 | 
 90 | sim:
 91 |   dt: 0.01667 # 1/60
 92 |   substeps: 2
 93 |   up_axis: "z"
 94 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 95 |   gravity: [0.0, 0.0, -9.81]
 96 |   physx:
 97 |     num_threads: ${....num_threads}
 98 |     solver_type: ${....solver_type}
 99 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
100 |     num_position_iterations: 8
101 |     num_velocity_iterations: 1
102 |     contact_offset: 0.005
103 |     rest_offset: 0.0
104 |     bounce_threshold_velocity: 0.2
105 |     max_depenetration_velocity: 1000.0
106 |     default_buffer_size_multiplier: 5.0
107 |     max_gpu_contact_pairs: 1048576 # 1024*1024
108 |     num_subscenes: ${....num_subscenes}
109 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
110 | 
111 | task:
112 |   randomize: False
113 | 


--------------------------------------------------------------------------------
/main/cfg/task/ReachAndGraspSingle.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: ReachAndGraspSingle
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym.
  8 | env:
  9 |   numEnvs: ${resolve_default:8192,${...num_envs}}
 10 |   episodeLength: 50
 11 | 
 12 |   clipObservations: 5.0
 13 |   clipActions: 1.0
 14 | 
 15 |   furniture: just_one_leg
 16 | 
 17 |   frankaDofNoise: 0.25
 18 | 
 19 |   targetLiftHeight: 0.05
 20 |   distanceReward: 0.1
 21 |   liftReward: 1.0
 22 |   successReward: 200.0
 23 | 
 24 |   aggregateMode: 3
 25 | 
 26 |   actionScale: 1.0
 27 |   useQuatRot: false
 28 | 
 29 |   frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035]
 30 | 
 31 |   # for distillation
 32 |   propDumpInfo:
 33 |     q: 7
 34 |     eef_pos: 3
 35 |     eef_quat: 4
 36 |     gripper_width: 1
 37 | 
 38 |   propObsDim: 29
 39 |   obsKeys:
 40 |     - q
 41 |     - cos_q
 42 |     - sin_q
 43 |     - eef_pos
 44 |     - eef_quat
 45 |     - gripper_width
 46 | 
 47 |   privilegedObsDim: 58
 48 |   privilegedObsKeys:
 49 |     - leg_pos
 50 |     - leg_rot
 51 |     - leg_vel
 52 |     - eef_vel
 53 |     - ftip_center_pos
 54 |     - obstacle_front_pos
 55 |     - obstacle_left_pos
 56 |     - obstacle_right_pos
 57 |     - front_wall_cf
 58 |     - left_wall_cf
 59 |     - right_wall_cf
 60 |     - leg_cf
 61 |     - eef_lf_pos
 62 |     - eef_rf_pos
 63 |     - dq
 64 | 
 65 |   # set to True if you use camera sensors in the environment
 66 |   enableCameraSensors: False
 67 | 
 68 | sim:
 69 |   dt: 0.01667 # 1/60
 70 |   substeps: 2
 71 |   up_axis: "z"
 72 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 73 |   gravity: [0.0, 0.0, -9.81]
 74 |   physx:
 75 |     num_threads: ${....num_threads}
 76 |     solver_type: ${....solver_type}
 77 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 78 |     num_position_iterations: 8
 79 |     num_velocity_iterations: 1
 80 |     contact_offset: 0.005
 81 |     rest_offset: 0.0
 82 |     bounce_threshold_velocity: 0.2
 83 |     max_depenetration_velocity: 1000.0
 84 |     default_buffer_size_multiplier: 5.0
 85 |     max_gpu_contact_pairs: 1048576 # 1024*1024
 86 |     num_subscenes: ${....num_subscenes}
 87 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
 88 | 
 89 | task:
 90 |   randomize: True
 91 |   randomization_params:
 92 |     frequency: 1
 93 |     sim_params:
 94 |       gravity:
 95 |         range: [ 0, 0.4 ]
 96 |         operation: "additive"
 97 |         distribution: "uniform"
 98 |         schedule: "linear"
 99 |         schedule_steps: 100000000
100 |     actor_params:
101 |       franka:
102 |         color: True
103 |         rigid_body_properties:
104 |           mass:
105 |             range: [0.5, 1.5]
106 |             operation: "scaling"
107 |             distribution: "uniform"
108 |             setup_only: True
109 |             schedule: "linear"
110 |             schedule_steps: 100000000
111 |         rigid_shape_properties:
112 |           friction:
113 |             num_buckets: 250
114 |             range: [ 0.7, 1.3 ]
115 |             operation: "scaling"
116 |             distribution: "uniform"
117 |             schedule: "linear"
118 |             schedule_steps: 100000000
119 |         dof_properties:
120 |           lower:
121 |             range: [ 1.0, 1.010050167084168 ]
122 |             operation: "scaling"
123 |             distribution: "loguniform"
124 |             schedule: "linear"
125 |             schedule_steps: 100000000
126 |           upper:
127 |             range: [ 1.0, 1.010050167084168 ]
128 |             operation: "scaling"
129 |             distribution: "loguniform"
130 |             schedule: "linear"
131 |             schedule_steps: 100000000
132 |           stiffness:
133 |             range: [ 1.0, 1.010050167084168 ]
134 |             operation: "scaling"
135 |             distribution: "loguniform"
136 |             schedule: "linear"
137 |             schedule_steps: 100000000
138 |           damping:
139 |             range: [ 1.0, 1.010050167084168 ]
140 |             operation: "scaling"
141 |             distribution: "loguniform"
142 |             schedule: "linear"
143 |             schedule_steps: 100000000
144 |       table:
145 |         color: True
146 |         rigid_shape_properties:
147 |           friction:
148 |             num_buckets: 250
149 |             range: [ 0.5, 1.5 ]
150 |             operation: "scaling"
151 |             distribution: "uniform"
152 |             schedule: "linear"
153 |             schedule_steps: 100000000
154 |       leg:
155 |         color: True
156 |         scale:
157 |           range: [0.9, 1.1]
158 |           operation: "scaling"
159 |           distribution: "uniform"
160 |           setup_only: True
161 |           schedule: "linear"
162 |           schedule_steps: 100000000
163 |         rigid_body_properties:
164 |           mass:
165 |             range: [ 0.5, 1.5 ]
166 |             operation: "scaling"
167 |             distribution: "uniform"
168 |             setup_only: True
169 |             schedule: "linear"
170 |             schedule_steps: 100000000
171 |         rigid_shape_properties:
172 |           friction:
173 |             num_buckets: 250
174 |             range: [ 0.5, 1.5 ]
175 |             operation: "scaling"
176 |             distribution: "uniform"
177 |             schedule: "linear"
178 |             schedule_steps: 100000000
179 |           rolling_friction:
180 |             num_buckets: 250
181 |             range: [ 0.5, 1.5 ]
182 |             operation: "scaling"
183 |             distribution: "uniform"
184 |             schedule: "linear"
185 |             schedule_steps: 100000000
186 |           torsion_friction:
187 |             num_buckets: 250
188 |             range: [ 0.5, 1.5 ]
189 |             operation: "scaling"
190 |             distribution: "uniform"
191 |             schedule: "linear"
192 |             schedule_steps: 100000000
193 |           restitution:
194 |             range: [0.0, 1.0]
195 |             operation: "additive"
196 |             distribution: "uniform"
197 |             schedule: "linear"
198 |             schedule_steps: 100000000
199 |           compliance:
200 |             range: [0.0, 1.0]
201 |             operation: "additive"
202 |             distribution: "uniform"
203 |             schedule: "linear"
204 |             schedule_steps: 100000000
205 | 


--------------------------------------------------------------------------------
/main/cfg/task/ReachAndGraspSinglePCD.yaml:
--------------------------------------------------------------------------------
 1 | # used to create the object
 2 | name: ReachAndGraspSinglePCD
 3 | 
 4 | physics_engine: ${..physics_engine}
 5 | seed: ${..seed}
 6 | 
 7 | # if given, will override the device setting in gym. 
 8 | env:
 9 |   numEnvs: ${resolve_default:8192,${...num_envs}}
10 |   episodeLength: 100
11 | 
12 |   furniture: "just_one_leg"
13 | 
14 |   pcdMaskRatio: null
15 |   pcdN: 768
16 | 
17 |   clipObservations: 5.0
18 |   clipActions: 1.0
19 | 
20 |   frankaDofNoise: 0.25
21 | 
22 |   targetLiftHeight: 0.05
23 |   distanceReward: 0.1
24 |   liftReward: 1.0
25 |   successReward: 200.0
26 | 
27 |   aggregateMode: 3
28 | 
29 |   actionScale: 1.0
30 |   useQuatRot: false
31 | 
32 |   frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035]
33 | 
34 |   pcAugmentation:
35 |     enabled: true
36 |     applyP: 0.4
37 |     randomTransLow: [-0.04, -0.04, -0.04]
38 |     randomTransHigh: [0.04, 0.04, 0.04]
39 | 
40 |     jitterRatio: 0.1
41 |     jitterSigma: 0.01
42 |     jitterHigh: 0.015
43 |     jitterLow: -0.015
44 | 
45 |   propObsDim: 29
46 |   obsKeys:
47 |     - q
48 |     - cos_q
49 |     - sin_q
50 |     - eef_pos
51 |     - eef_quat
52 |     - gripper_width
53 | 
54 |   privilegedObsDim: 58
55 |   privilegedObsKeys:
56 |     - leg_pos
57 |     - leg_rot
58 |     - leg_vel
59 |     - eef_vel
60 |     - ftip_center_pos
61 |     - obstacle_front_pos
62 |     - obstacle_left_pos
63 |     - obstacle_right_pos
64 |     - front_wall_cf
65 |     - left_wall_cf
66 |     - right_wall_cf
67 |     - leg_cf
68 |     - eef_lf_pos
69 |     - eef_rf_pos
70 |     - dq
71 | 
72 |   # set to True if you use camera sensors in the environment
73 |   enableCameraSensors: False
74 | 
75 | sim:
76 |   dt: 0.01667 # 1/60
77 |   substeps: 2
78 |   up_axis: "z"
79 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
80 |   gravity: [0.0, 0.0, -9.81]
81 |   physx:
82 |     num_threads: ${....num_threads}
83 |     solver_type: ${....solver_type}
84 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
85 |     num_position_iterations: 8
86 |     num_velocity_iterations: 1
87 |     contact_offset: 0.005
88 |     rest_offset: 0.0
89 |     bounce_threshold_velocity: 0.2
90 |     max_depenetration_velocity: 1000.0
91 |     default_buffer_size_multiplier: 5.0
92 |     max_gpu_contact_pairs: 1048576 # 1024*1024
93 |     num_subscenes: ${....num_subscenes}
94 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
95 | 
96 | task:
97 |   randomize: False
98 | 


--------------------------------------------------------------------------------
/main/cfg/task/ScrewFull.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: ScrewFull
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym.
  8 | env:
  9 |   numEnvs: ${resolve_default:768,${...num_envs}}
 10 |   episodeLength: 200
 11 | 
 12 |   furniture: "square_table"
 13 | 
 14 |   clipObservations: 5.0
 15 |   clipActions: 1.0
 16 | 
 17 |   frankaDofNoise: 0.25
 18 | 
 19 |   screwReward: 0.1
 20 |   eefDeviatePenalty: 1e-2
 21 |   successReward: 100.0
 22 |   failurePenalty: 0.0
 23 |   initialQ7NoiseLevel: 0.5  # +- 50% noise range
 24 |   initialQ1toQ6NoiseLevel: 0.02  # +- 2% noise range
 25 | 
 26 |   aggregateMode: 3
 27 | 
 28 |   actionScale: 1.0
 29 |   useQuatRot: false
 30 | 
 31 |   frankaDefaultDofPos: [-0.0186, -0.1682,  0.0344, -2.6468,  0.0269,  2.5371,  0.7211,  0.0145, 0.0145]
 32 | 
 33 |   # for distillation
 34 |   propDumpInfo:
 35 |     q: 7
 36 |     eef_pos: 3
 37 |     eef_quat: 4
 38 |     gripper_width: 1
 39 | 
 40 |   propObsDim: 29
 41 |   obsKeys:
 42 |     - q
 43 |     - cos_q
 44 |     - sin_q
 45 |     - eef_pos
 46 |     - eef_quat
 47 |     - gripper_width
 48 | 
 49 |   privilegedObsDim: 74
 50 |   privilegedObsKeys:
 51 |     - square_table_top_pos
 52 |     - square_table_top_rot
 53 |     - square_table_top_vel
 54 |     - square_table_leg4_pos
 55 |     - square_table_leg4_rot
 56 |     - square_table_leg4_vel
 57 |     - eef_vel
 58 |     - ftip_center_pos
 59 |     - obstacle_front_pos
 60 |     - obstacle_left_pos
 61 |     - obstacle_right_pos
 62 |     - front_wall_cf
 63 |     - left_wall_cf
 64 |     - right_wall_cf
 65 |     - square_table_top_cf
 66 |     - square_table_leg4_cf
 67 |     - eef_lf_pos
 68 |     - eef_rf_pos
 69 |     - dq
 70 | 
 71 |   # set to True if you use camera sensors in the environment
 72 |   enableCameraSensors: False
 73 | 
 74 | sim:
 75 |   dt: 0.01667 # 1/60
 76 |   substeps: 2
 77 |   up_axis: "z"
 78 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 79 |   gravity: [0.0, 0.0, -9.81]
 80 |   physx:
 81 |     num_threads: ${....num_threads}
 82 |     solver_type: ${....solver_type}
 83 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 84 |     num_position_iterations: 8
 85 |     num_velocity_iterations: 1
 86 |     contact_offset: 0.005
 87 |     rest_offset: 0.0
 88 |     bounce_threshold_velocity: 0.2
 89 |     max_depenetration_velocity: 1000.0
 90 |     default_buffer_size_multiplier: 5.0
 91 |     max_gpu_contact_pairs: 1048576 # 1024*1024
 92 |     num_subscenes: ${....num_subscenes}
 93 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
 94 | 
 95 | task:
 96 |   randomize: True
 97 |   randomization_params:
 98 |     frequency: 1
 99 |     sim_params:
100 |       gravity:
101 |         range: [ 0, 0.4 ]
102 |         operation: "additive"
103 |         distribution: "uniform"
104 |         schedule: "linear"
105 |         schedule_steps: 100000000
106 |     actor_params:
107 |       franka:
108 |         color: True
109 |         rigid_body_properties:
110 |           mass:
111 |             range: [0.5, 1.5]
112 |             operation: "scaling"
113 |             distribution: "uniform"
114 |             setup_only: True
115 |             schedule: "linear"
116 |             schedule_steps: 100000000
117 |         rigid_shape_properties:
118 |           friction:
119 |             num_buckets: 250
120 |             range: [ 0.7, 1.3 ]
121 |             operation: "scaling"
122 |             distribution: "uniform"
123 |             schedule: "linear"
124 |             schedule_steps: 100000000
125 |         dof_properties:
126 |           lower:
127 |             range: [ 1.0, 1.010050167084168 ]
128 |             operation: "scaling"
129 |             distribution: "loguniform"
130 |             schedule: "linear"
131 |             schedule_steps: 100000000
132 |           upper:
133 |             range: [ 1.0, 1.010050167084168 ]
134 |             operation: "scaling"
135 |             distribution: "loguniform"
136 |             schedule: "linear"
137 |             schedule_steps: 100000000
138 |           stiffness:
139 |             range: [ 1.0, 1.010050167084168 ]
140 |             operation: "scaling"
141 |             distribution: "loguniform"
142 |             schedule: "linear"
143 |             schedule_steps: 100000000
144 |           damping:
145 |             range: [ 1.0, 1.010050167084168 ]
146 |             operation: "scaling"
147 |             distribution: "loguniform"
148 |             schedule: "linear"
149 |             schedule_steps: 100000000
150 |       table:
151 |         color: True
152 |         rigid_shape_properties:
153 |           friction:
154 |             num_buckets: 250
155 |             range: [ 0.5, 1.5 ]
156 |             operation: "scaling"
157 |             distribution: "uniform"
158 |             schedule: "linear"
159 |             schedule_steps: 100000000
160 |       square_table_leg4:
161 |         color: True
162 |         rigid_body_properties:
163 |           mass:
164 |             range: [ 0.5, 1.5 ]
165 |             operation: "scaling"
166 |             distribution: "uniform"
167 |             setup_only: True
168 |             schedule: "linear"
169 |             schedule_steps: 100000000
170 |         rigid_shape_properties:
171 |           friction:
172 |             num_buckets: 250
173 |             range: [ 0.5, 1.5 ]
174 |             operation: "scaling"
175 |             distribution: "uniform"
176 |             schedule: "linear"
177 |             schedule_steps: 100000000
178 |           rolling_friction:
179 |             num_buckets: 250
180 |             range: [ 0.5, 1.5 ]
181 |             operation: "scaling"
182 |             distribution: "uniform"
183 |             schedule: "linear"
184 |             schedule_steps: 100000000
185 |           torsion_friction:
186 |             num_buckets: 250
187 |             range: [ 0.5, 1.5 ]
188 |             operation: "scaling"
189 |             distribution: "uniform"
190 |             schedule: "linear"
191 |             schedule_steps: 100000000
192 |           restitution:
193 |             range: [0.0, 1.0]
194 |             operation: "additive"
195 |             distribution: "uniform"
196 |             schedule: "linear"
197 |             schedule_steps: 100000000
198 |           compliance:
199 |             range: [0.0, 1.0]
200 |             operation: "additive"
201 |             distribution: "uniform"
202 |             schedule: "linear"
203 |             schedule_steps: 100000000
204 |       square_table_top:
205 |         color: True
206 |         rigid_body_properties:
207 |           mass:
208 |             range: [ 0.5, 1.5 ]
209 |             operation: "scaling"
210 |             distribution: "uniform"
211 |             setup_only: True
212 |             schedule: "linear"
213 |             schedule_steps: 100000000
214 |         rigid_shape_properties:
215 |           friction:
216 |             num_buckets: 250
217 |             range: [ 0.5, 1.5 ]
218 |             operation: "scaling"
219 |             distribution: "uniform"
220 |             schedule: "linear"
221 |             schedule_steps: 100000000
222 |           rolling_friction:
223 |             num_buckets: 250
224 |             range: [ 0.5, 1.5 ]
225 |             operation: "scaling"
226 |             distribution: "uniform"
227 |             schedule: "linear"
228 |             schedule_steps: 100000000
229 |           torsion_friction:
230 |             num_buckets: 250
231 |             range: [ 0.5, 1.5 ]
232 |             operation: "scaling"
233 |             distribution: "uniform"
234 |             schedule: "linear"
235 |             schedule_steps: 100000000
236 |           restitution:
237 |             range: [0.0, 1.0]
238 |             operation: "additive"
239 |             distribution: "uniform"
240 |             schedule: "linear"
241 |             schedule_steps: 100000000
242 |           compliance:
243 |             range: [0.0, 1.0]
244 |             operation: "additive"
245 |             distribution: "uniform"
246 |             schedule: "linear"
247 |             schedule_steps: 100000000


--------------------------------------------------------------------------------
/main/cfg/task/ScrewFullPCD.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: ScrewFullPCD
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym. 
  8 | env:
  9 |   numEnvs: ${resolve_default:2048,${...num_envs}}
 10 |   episodeLength: 200
 11 | 
 12 |   furniture: "square_table"
 13 | 
 14 |   pcdMaskRatio: null
 15 |   pcdN: 768
 16 | 
 17 |   clipObservations: 5.0
 18 |   clipActions: 1.0
 19 | 
 20 |   frankaDofNoise: 0.25
 21 | 
 22 |   screwReward: 0.1
 23 |   eefDeviatePenalty: 1e-2
 24 |   successReward: 100.0
 25 |   failurePenalty: 0.0
 26 |   initialQ7NoiseLevel: 0.5  # +- 50% noise range
 27 |   initialQ1toQ6NoiseLevel: 0.02  # +- 2% noise range
 28 | 
 29 |   aggregateMode: 3
 30 | 
 31 |   actionScale: 1.0
 32 |   useQuatRot: false
 33 | 
 34 |   frankaDefaultDofPos: [-0.0186, -0.1682,  0.0344, -2.6468,  0.0269,  2.5371,  0.7211,  0.0145, 0.0145]
 35 | 
 36 |   pcAugmentation:
 37 |     enabled: true
 38 |     applyP: 0.4
 39 |     randomTransLow: [-0.04, -0.04, -0.04]
 40 |     randomTransHigh: [0.04, 0.04, 0.04]
 41 | 
 42 |     jitterRatio: 0.1
 43 |     jitterSigma: 0.01
 44 |     jitterHigh: 0.015
 45 |     jitterLow: -0.015
 46 | 
 47 |   propObsDim: 29
 48 |   obsKeys:
 49 |     - q
 50 |     - cos_q
 51 |     - sin_q
 52 |     - eef_pos
 53 |     - eef_quat
 54 |     - gripper_width
 55 | 
 56 |   privilegedObsDim: 74
 57 |   privilegedObsKeys:
 58 |     - square_table_top_pos
 59 |     - square_table_top_rot
 60 |     - square_table_top_vel
 61 |     - square_table_leg4_pos
 62 |     - square_table_leg4_rot
 63 |     - square_table_leg4_vel
 64 |     - eef_vel
 65 |     - ftip_center_pos
 66 |     - obstacle_front_pos
 67 |     - obstacle_left_pos
 68 |     - obstacle_right_pos
 69 |     - front_wall_cf
 70 |     - left_wall_cf
 71 |     - right_wall_cf
 72 |     - square_table_top_cf
 73 |     - square_table_leg4_cf
 74 |     - eef_lf_pos
 75 |     - eef_rf_pos
 76 |     - dq
 77 | 
 78 |   # set to True if you use camera sensors in the environment
 79 |   enableCameraSensors: False
 80 | 
 81 | sim:
 82 |   dt: 0.01667 # 1/60
 83 |   substeps: 2
 84 |   up_axis: "z"
 85 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 86 |   gravity: [0.0, 0.0, -9.81]
 87 |   physx:
 88 |     num_threads: ${....num_threads}
 89 |     solver_type: ${....solver_type}
 90 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 91 |     num_position_iterations: 8
 92 |     num_velocity_iterations: 1
 93 |     contact_offset: 0.005
 94 |     rest_offset: 0.0
 95 |     bounce_threshold_velocity: 0.2
 96 |     max_depenetration_velocity: 1000.0
 97 |     default_buffer_size_multiplier: 5.0
 98 |     max_gpu_contact_pairs: 1048576 # 1024*1024
 99 |     num_subscenes: ${....num_subscenes}
100 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
101 | 
102 | task:
103 |   randomize: False
104 | 


--------------------------------------------------------------------------------
/main/cfg/task/ScrewSingle.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: ScrewSingle
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym.
  8 | env:
  9 |   numEnvs: ${resolve_default:8192,${...num_envs}}
 10 |   episodeLength: 200
 11 | 
 12 |   furniture: "table_with_one_leg"
 13 | 
 14 |   clipObservations: 5.0
 15 |   clipActions: 1.0
 16 | 
 17 |   frankaDofNoise: 0.25
 18 | 
 19 |   screwReward: 0.1
 20 |   eefDeviatePenalty: 1e-2
 21 |   successReward: 100.0
 22 |   failurePenalty: 0.0
 23 | 
 24 |   initialQ7NoiseLevel: 0.5  # +- 50% noise range
 25 |   initialQ1toQ6NoiseLevel: 0.02  # +- 2% noise range
 26 | 
 27 |   aggregateMode: 3
 28 | 
 29 |   actionScale: 1.0
 30 |   useQuatRot: false
 31 | 
 32 |   frankaDefaultDofPos: [-0.0186, -0.1682,  0.0344, -2.6468,  0.0269,  2.5371,  0.7211,  0.0145, 0.0145]
 33 | 
 34 |   # for distillation
 35 |   propDumpInfo:
 36 |     q: 7
 37 |     eef_pos: 3
 38 |     eef_quat: 4
 39 |     gripper_width: 1
 40 | 
 41 |   propObsDim: 29
 42 |   obsKeys:
 43 |     - q
 44 |     - cos_q
 45 |     - sin_q
 46 |     - eef_pos
 47 |     - eef_quat
 48 |     - gripper_width
 49 | 
 50 |   privilegedObsDim: 74
 51 |   privilegedObsKeys:
 52 |     - square_table_top_pos
 53 |     - square_table_top_rot
 54 |     - square_table_top_vel
 55 |     - square_table_leg4_pos
 56 |     - square_table_leg4_rot
 57 |     - square_table_leg4_vel
 58 |     - eef_vel
 59 |     - ftip_center_pos
 60 |     - obstacle_front_pos
 61 |     - obstacle_left_pos
 62 |     - obstacle_right_pos
 63 |     - front_wall_cf
 64 |     - left_wall_cf
 65 |     - right_wall_cf
 66 |     - square_table_top_cf
 67 |     - square_table_leg4_cf
 68 |     - eef_lf_pos
 69 |     - eef_rf_pos
 70 |     - dq
 71 | 
 72 |   # set to True if you use camera sensors in the environment
 73 |   enableCameraSensors: False
 74 | 
 75 | sim:
 76 |   dt: 0.01667 # 1/60
 77 |   substeps: 2
 78 |   up_axis: "z"
 79 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 80 |   gravity: [0.0, 0.0, -9.81]
 81 |   physx:
 82 |     num_threads: ${....num_threads}
 83 |     solver_type: ${....solver_type}
 84 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 85 |     num_position_iterations: 8
 86 |     num_velocity_iterations: 1
 87 |     contact_offset: 0.005
 88 |     rest_offset: 0.0
 89 |     bounce_threshold_velocity: 0.2
 90 |     max_depenetration_velocity: 1000.0
 91 |     default_buffer_size_multiplier: 5.0
 92 |     max_gpu_contact_pairs: 1048576 # 1024*1024
 93 |     num_subscenes: ${....num_subscenes}
 94 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
 95 | 
 96 | task:
 97 |   randomize: True
 98 |   randomization_params:
 99 |     frequency: 1
100 |     sim_params:
101 |       gravity:
102 |         range: [ 0, 0.4 ]
103 |         operation: "additive"
104 |         distribution: "uniform"
105 |         schedule: "linear"
106 |         schedule_steps: 100000000
107 |     actor_params:
108 |       franka:
109 |         color: True
110 |         rigid_body_properties:
111 |           mass:
112 |             range: [0.5, 1.5]
113 |             operation: "scaling"
114 |             distribution: "uniform"
115 |             setup_only: True
116 |             schedule: "linear"
117 |             schedule_steps: 100000000
118 |         rigid_shape_properties:
119 |           friction:
120 |             num_buckets: 250
121 |             range: [ 0.7, 1.3 ]
122 |             operation: "scaling"
123 |             distribution: "uniform"
124 |             schedule: "linear"
125 |             schedule_steps: 100000000
126 |         dof_properties:
127 |           lower:
128 |             range: [ 1.0, 1.010050167084168 ]
129 |             operation: "scaling"
130 |             distribution: "loguniform"
131 |             schedule: "linear"
132 |             schedule_steps: 100000000
133 |           upper:
134 |             range: [ 1.0, 1.010050167084168 ]
135 |             operation: "scaling"
136 |             distribution: "loguniform"
137 |             schedule: "linear"
138 |             schedule_steps: 100000000
139 |           stiffness:
140 |             range: [ 1.0, 1.010050167084168 ]
141 |             operation: "scaling"
142 |             distribution: "loguniform"
143 |             schedule: "linear"
144 |             schedule_steps: 100000000
145 |           damping:
146 |             range: [ 1.0, 1.010050167084168 ]
147 |             operation: "scaling"
148 |             distribution: "loguniform"
149 |             schedule: "linear"
150 |             schedule_steps: 100000000
151 |       table:
152 |         color: True
153 |         rigid_shape_properties:
154 |           friction:
155 |             num_buckets: 250
156 |             range: [ 0.5, 1.5 ]
157 |             operation: "scaling"
158 |             distribution: "uniform"
159 |             schedule: "linear"
160 |             schedule_steps: 100000000
161 |       square_table_leg4:
162 |         color: True
163 |         rigid_body_properties:
164 |           mass:
165 |             range: [ 0.5, 1.5 ]
166 |             operation: "scaling"
167 |             distribution: "uniform"
168 |             setup_only: True
169 |             schedule: "linear"
170 |             schedule_steps: 100000000
171 |         rigid_shape_properties:
172 |           friction:
173 |             num_buckets: 250
174 |             range: [ 0.5, 1.5 ]
175 |             operation: "scaling"
176 |             distribution: "uniform"
177 |             schedule: "linear"
178 |             schedule_steps: 100000000
179 |           rolling_friction:
180 |             num_buckets: 250
181 |             range: [ 0.5, 1.5 ]
182 |             operation: "scaling"
183 |             distribution: "uniform"
184 |             schedule: "linear"
185 |             schedule_steps: 100000000
186 |           torsion_friction:
187 |             num_buckets: 250
188 |             range: [ 0.5, 1.5 ]
189 |             operation: "scaling"
190 |             distribution: "uniform"
191 |             schedule: "linear"
192 |             schedule_steps: 100000000
193 |           restitution:
194 |             range: [0.0, 1.0]
195 |             operation: "additive"
196 |             distribution: "uniform"
197 |             schedule: "linear"
198 |             schedule_steps: 100000000
199 |           compliance:
200 |             range: [0.0, 1.0]
201 |             operation: "additive"
202 |             distribution: "uniform"
203 |             schedule: "linear"
204 |             schedule_steps: 100000000
205 |       square_table_top:
206 |         color: True
207 |         rigid_body_properties:
208 |           mass:
209 |             range: [ 0.5, 1.5 ]
210 |             operation: "scaling"
211 |             distribution: "uniform"
212 |             setup_only: True
213 |             schedule: "linear"
214 |             schedule_steps: 100000000
215 |         rigid_shape_properties:
216 |           friction:
217 |             num_buckets: 250
218 |             range: [ 0.5, 1.5 ]
219 |             operation: "scaling"
220 |             distribution: "uniform"
221 |             schedule: "linear"
222 |             schedule_steps: 100000000
223 |           rolling_friction:
224 |             num_buckets: 250
225 |             range: [ 0.5, 1.5 ]
226 |             operation: "scaling"
227 |             distribution: "uniform"
228 |             schedule: "linear"
229 |             schedule_steps: 100000000
230 |           torsion_friction:
231 |             num_buckets: 250
232 |             range: [ 0.5, 1.5 ]
233 |             operation: "scaling"
234 |             distribution: "uniform"
235 |             schedule: "linear"
236 |             schedule_steps: 100000000
237 |           restitution:
238 |             range: [0.0, 1.0]
239 |             operation: "additive"
240 |             distribution: "uniform"
241 |             schedule: "linear"
242 |             schedule_steps: 100000000
243 |           compliance:
244 |             range: [0.0, 1.0]
245 |             operation: "additive"
246 |             distribution: "uniform"
247 |             schedule: "linear"
248 |             schedule_steps: 100000000


--------------------------------------------------------------------------------
/main/cfg/task/ScrewSinglePCD.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: ScrewSinglePCD
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym. 
  8 | env:
  9 |   numEnvs: ${resolve_default:2048,${...num_envs}}
 10 |   episodeLength: 200
 11 | 
 12 |   furniture: "table_with_one_leg"
 13 | 
 14 |   pcdMaskRatio: null
 15 |   pcdN: 768
 16 | 
 17 |   clipObservations: 5.0
 18 |   clipActions: 1.0
 19 | 
 20 |   frankaDofNoise: 0.25
 21 | 
 22 |   screwReward: 0.1
 23 |   eefDeviatePenalty: 1e-2
 24 |   successReward: 100.0
 25 |   failurePenalty: 0.0
 26 |   initialQ7NoiseLevel: 0.5  # +- 50% noise range
 27 |   initialQ1toQ6NoiseLevel: 0.02  # +- 2% noise range
 28 | 
 29 |   aggregateMode: 3
 30 | 
 31 |   actionScale: 1.0
 32 |   useQuatRot: false
 33 | 
 34 |   frankaDefaultDofPos: [-0.0186, -0.1682,  0.0344, -2.6468,  0.0269,  2.5371,  0.7211,  0.0145, 0.0145]
 35 | 
 36 |   pcAugmentation:
 37 |     enabled: true
 38 |     applyP: 0.4
 39 |     randomTransLow: [-0.04, -0.04, -0.04]
 40 |     randomTransHigh: [0.04, 0.04, 0.04]
 41 | 
 42 |     jitterRatio: 0.1
 43 |     jitterSigma: 0.01
 44 |     jitterHigh: 0.015
 45 |     jitterLow: -0.015
 46 | 
 47 |   propObsDim: 29
 48 |   obsKeys:
 49 |     - q
 50 |     - cos_q
 51 |     - sin_q
 52 |     - eef_pos
 53 |     - eef_quat
 54 |     - gripper_width
 55 | 
 56 |   privilegedObsDim: 74
 57 |   privilegedObsKeys:
 58 |     - square_table_top_pos
 59 |     - square_table_top_rot
 60 |     - square_table_top_vel
 61 |     - square_table_leg4_pos
 62 |     - square_table_leg4_rot
 63 |     - square_table_leg4_vel
 64 |     - eef_vel
 65 |     - ftip_center_pos
 66 |     - obstacle_front_pos
 67 |     - obstacle_left_pos
 68 |     - obstacle_right_pos
 69 |     - front_wall_cf
 70 |     - left_wall_cf
 71 |     - right_wall_cf
 72 |     - square_table_top_cf
 73 |     - square_table_leg4_cf
 74 |     - eef_lf_pos
 75 |     - eef_rf_pos
 76 |     - dq
 77 | 
 78 |   # set to True if you use camera sensors in the environment
 79 |   enableCameraSensors: False
 80 | 
 81 | sim:
 82 |   dt: 0.01667 # 1/60
 83 |   substeps: 2
 84 |   up_axis: "z"
 85 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 86 |   gravity: [0.0, 0.0, -9.81]
 87 |   physx:
 88 |     num_threads: ${....num_threads}
 89 |     solver_type: ${....solver_type}
 90 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
 91 |     num_position_iterations: 8
 92 |     num_velocity_iterations: 1
 93 |     contact_offset: 0.005
 94 |     rest_offset: 0.0
 95 |     bounce_threshold_velocity: 0.2
 96 |     max_depenetration_velocity: 1000.0
 97 |     default_buffer_size_multiplier: 5.0
 98 |     max_gpu_contact_pairs: 1048576 # 1024*1024
 99 |     num_subscenes: ${....num_subscenes}
100 |     contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
101 | 
102 | task:
103 |   randomize: False
104 | 


--------------------------------------------------------------------------------
/main/cfg/task/StabilizePCD.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: StabilizePCD
  3 | 
  4 | physics_engine: ${..physics_engine}
  5 | seed: ${..seed}
  6 | 
  7 | # if given, will override the device setting in gym. 
  8 | env:
  9 |   numEnvs: ${resolve_default:768,${...num_envs}}
 10 |   episodeLength: 100
 11 | 
 12 |   furniture: "one_leg"
 13 | 
 14 |   successWeight: 10.0
 15 |   failureWeight: 0.0
 16 |   qdPenalty: 1e-3
 17 |   actionPenalty: 1e-5
 18 | 
 19 |   pcdMaskRatio: null
 20 |   pcdN: 768
 21 | 
 22 |   clipObservations: 5.0
 23 |   clipActions: 1.0
 24 | 
 25 |   frankaDofNoise: 0.25
 26 | 
 27 |   aggregateMode: 3
 28 | 
 29 |   actionScale: 1.0
 30 |   useQuatRot: false
 31 | 
 32 |   frankaDefaultDofPos: [0.12162008114028396, -0.19826458111314524, -0.01990020486871322, -2.4732269941140346, -0.01307073642274261, 2.30396583422025, 0.8480939705504309, 0.035, 0.035]
 33 | 
 34 |   pcAugmentation:
 35 |     enabled: true
 36 |     applyP: 0.4
 37 |     randomTransLow: [-0.04, -0.04, -0.04]
 38 |     randomTransHigh: [0.04, 0.04, 0.04]
 39 | 
 40 |     jitterRatio: 0.1
 41 |     jitterSigma: 0.01
 42 |     jitterHigh: 0.015
 43 |     jitterLow: -0.015
 44 | 
 45 |   propObsDim: 29
 46 |   obsKeys:
 47 |     - q
 48 |     - cos_q
 49 |     - sin_q
 50 |     - eef_pos
 51 |     - eef_quat
 52 |     - gripper_width
 53 | 
 54 |   privilegedObsDim: 145
 55 |   privilegedObsKeys:
 56 |     - square_table_top_pos
 57 |     - square_table_top_rot
 58 |     - square_table_top_vel
 59 |     - square_table_leg1_pos
 60 |     - square_table_leg1_rot
 61 |     - square_table_leg1_vel
 62 |     - square_table_leg2_pos
 63 |     - square_table_leg2_rot
 64 |     - square_table_leg2_vel
 65 |     - square_table_leg3_pos
 66 |     - square_table_leg3_rot
 67 |     - square_table_leg3_vel
 68 |     - square_table_leg4_pos
 69 |     - square_table_leg4_rot
 70 |     - square_table_leg4_vel
 71 |     - obstacle_front_pos
 72 |     - obstacle_left_pos
 73 |     - obstacle_right_pos
 74 |     - front_wall_cf
 75 |     - left_wall_cf
 76 |     - square_table_top_cf
 77 |     - square_table_leg1_cf
 78 |     - square_table_leg2_cf
 79 |     - square_table_leg3_cf
 80 |     - square_table_leg4_cf
 81 |     - eef_vel
 82 |     - eef_lf_pos
 83 |     - eef_rf_pos
 84 |     - q
 85 |     - cos_q
 86 |     - sin_q
 87 |     - dq
 88 |     - q_gripper
 89 | 
 90 |   # set to True if you use camera sensors in the environment
 91 |   enableCameraSensors: False
 92 | 
 93 | sim:
 94 |   dt: 0.01667 # 1/60
 95 |   substeps: 2
 96 |   up_axis: "z"
 97 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
 98 |   gravity: [0.0, 0.0, -9.81]
 99 |   physx:
100 |     num_threads: ${....num_threads}
101 |     solver_type: ${....solver_type}
102 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
103 |     num_position_iterations: 8
104 |     num_velocity_iterations: 1
105 |     contact_offset: 0.005
106 |     rest_offset: 0.0
107 |     bounce_threshold_velocity: 0.2
108 |     max_depenetration_velocity: 1000.0
109 |     default_buffer_size_multiplier: 5.0
110 |     max_gpu_contact_pairs: 1048576 # 1024*1024
111 |     num_subscenes: ${....num_subscenes}
112 |     contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
113 | 
114 | task:
115 |   randomize: False
116 | 


--------------------------------------------------------------------------------
/main/distillation/test.py:
--------------------------------------------------------------------------------
 1 | import isaacgym
 2 | import hydra
 3 | import transic
 4 | from transic.utils.utils import set_seed
 5 | from transic.distillation.data.data_module import DummyDataset
 6 | from transic.utils.config_utils import omegaconf_to_dict
 7 | 
 8 | import cv2
 9 | import numpy as np
10 | 
11 | cv2.imshow("dummy", np.zeros((1, 1, 3), dtype=np.uint8))
12 | cv2.waitKey(1)
13 | 
14 | 
15 | @hydra.main(config_name="distillation_config", config_path="../cfg", version_base="1.1")
16 | def main(cfg):
17 |     cfg.seed = set_seed(cfg.seed)
18 | 
19 |     if cfg.test.ckpt_path is None:
20 |         print(
21 |             "[WARNING] No ckpt_path is provided, will test with random weights. Press enter to continue."
22 |         )
23 |         input()
24 | 
25 |     from transic.learn.lightning import LightingTrainer
26 | 
27 |     trainer_ = LightingTrainer(cfg)
28 |     trainer_.trainer.loggers[-1].log_hyperparams(omegaconf_to_dict(cfg))
29 |     trainer_.trainer.test(
30 |         model=trainer_.module,
31 |         dataloaders=DummyDataset(batch_size=1, epoch_len=1).get_dataloader(),
32 |         ckpt_path=cfg.test.ckpt_path,
33 |     )
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     main()
38 | 


--------------------------------------------------------------------------------
/main/distillation/train.py:
--------------------------------------------------------------------------------
 1 | import isaacgym
 2 | import hydra
 3 | import transic
 4 | from transic.utils.utils import set_seed
 5 | from transic.learn.lightning import LightingTrainer
 6 | from transic.utils.config_utils import omegaconf_to_dict
 7 | 
 8 | 
 9 | @hydra.main(config_name="distillation_config", config_path="../cfg", version_base="1.1")
10 | def main(cfg):
11 |     cfg.seed = set_seed(cfg.seed)
12 |     trainer_ = LightingTrainer(cfg)
13 |     trainer_.trainer.loggers[-1].log_hyperparams(omegaconf_to_dict(cfg))
14 |     trainer_.fit()
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/main/residual/train.py:
--------------------------------------------------------------------------------
 1 | import isaacgym
 2 | import hydra
 3 | import transic
 4 | from transic.utils.utils import set_seed
 5 | from transic.learn.lightning import LightingTrainer
 6 | from transic.utils.config_utils import omegaconf_to_dict
 7 | 
 8 | 
 9 | @hydra.main(config_name="residual_config", config_path="../cfg", version_base="1.1")
10 | def main(cfg):
11 |     cfg.seed = set_seed(cfg.seed)
12 |     trainer_ = LightingTrainer(cfg)
13 |     trainer_.trainer.loggers[-1].log_hyperparams(omegaconf_to_dict(cfg))
14 |     trainer_.fit()
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/main/rl/train.py:
--------------------------------------------------------------------------------
  1 | # train.py
  2 | # Script to train policies in Isaac Gym
  3 | #
  4 | # Copyright (c) 2018-2023, NVIDIA Corporation
  5 | # All rights reserved.
  6 | #
  7 | # Redistribution and use in source and binary forms, with or without
  8 | # modification, are permitted provided that the following conditions are met:
  9 | #
 10 | # 1. Redistributions of source code must retain the above copyright notice, this
 11 | #    list of conditions and the following disclaimer.
 12 | #
 13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 14 | #    this list of conditions and the following disclaimer in the documentation
 15 | #    and/or other materials provided with the distribution.
 16 | #
 17 | # 3. Neither the name of the copyright holder nor the names of its
 18 | #    contributors may be used to endorse or promote products derived from
 19 | #    this software without specific prior written permission.
 20 | #
 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | 
 32 | import hydra
 33 | 
 34 | from omegaconf import DictConfig, OmegaConf
 35 | import transic
 36 | 
 37 | 
 38 | def preprocess_train_config(cfg, config_dict):
 39 |     """
 40 |     Adding common configuration parameters to the rl_games train config.
 41 |     An alternative to this is inferring them in task-specific .yaml files, but that requires repeating the same
 42 |     variable interpolations in each config.
 43 |     """
 44 | 
 45 |     train_cfg = config_dict["params"]["config"]
 46 | 
 47 |     train_cfg["device"] = cfg.rl_device
 48 | 
 49 |     train_cfg["population_based_training"] = False
 50 |     train_cfg["pbt_idx"] = None
 51 | 
 52 |     train_cfg["full_experiment_name"] = cfg.get("full_experiment_name")
 53 | 
 54 |     print(f"Using rl_device: {cfg.rl_device}")
 55 |     print(f"Using sim_device: {cfg.sim_device}")
 56 |     print(train_cfg)
 57 | 
 58 |     try:
 59 |         model_size_multiplier = config_dict["params"]["network"]["mlp"][
 60 |             "model_size_multiplier"
 61 |         ]
 62 |         if model_size_multiplier != 1:
 63 |             units = config_dict["params"]["network"]["mlp"]["units"]
 64 |             for i, u in enumerate(units):
 65 |                 units[i] = u * model_size_multiplier
 66 |             print(
 67 |                 f'Modified MLP units by x{model_size_multiplier} to {config_dict["params"]["network"]["mlp"]["units"]}'
 68 |             )
 69 |     except KeyError:
 70 |         pass
 71 | 
 72 |     return config_dict
 73 | 
 74 | 
 75 | @hydra.main(version_base="1.1", config_name="config", config_path="../cfg")
 76 | def launch_rlg_hydra(cfg: DictConfig):
 77 |     import os
 78 |     from datetime import datetime
 79 | 
 80 |     import isaacgym
 81 |     from hydra.utils import to_absolute_path
 82 | 
 83 |     if cfg.display:
 84 |         import cv2
 85 |         import numpy as np
 86 | 
 87 |         cv2.imshow("dummy", np.zeros((1, 1, 3), dtype=np.uint8))
 88 |         cv2.waitKey(1)
 89 | 
 90 |     import transic_envs
 91 |     from transic.utils.reformat import omegaconf_to_dict, print_dict
 92 |     from transic.utils.utils import set_np_formatting, set_seed
 93 | 
 94 |     from transic.utils.rlgames_utils import (
 95 |         RLGPUAlgoObserver,
 96 |         MultiObserver,
 97 |         ComplexObsRLGPUEnv,
 98 |     )
 99 |     from transic.utils.wandb_utils import WandbAlgoObserver
100 |     from rl_games.common import env_configurations, vecenv
101 |     from transic.rl.runner import Runner
102 |     from transic.rl.network_builder import DictObsBuilder
103 |     from transic.rl.models import ModelA2CContinuousLogStd
104 |     from rl_games.algos_torch.model_builder import register_network, register_model
105 |     from transic.utils.wandb_utils import WandbVideoCaptureWrapper
106 | 
107 |     register_model("my_continuous_a2c_logstd", ModelA2CContinuousLogStd)
108 |     register_network("dict_obs_actor_critic", DictObsBuilder)
109 | 
110 |     # ensure checkpoints can be specified as relative paths
111 |     if cfg.checkpoint:
112 |         cfg.checkpoint = to_absolute_path(cfg.checkpoint)
113 | 
114 |     cfg_dict = omegaconf_to_dict(cfg)
115 |     print_dict(cfg_dict)
116 | 
117 |     # set numpy formatting for printing only
118 |     set_np_formatting()
119 | 
120 |     # global rank of the GPU
121 |     global_rank = int(os.getenv("RANK", "0"))
122 | 
123 |     # sets seed. if seed is -1 will pick a random one
124 |     cfg.seed = set_seed(
125 |         cfg.seed, torch_deterministic=cfg.torch_deterministic, rank=global_rank
126 |     )
127 | 
128 |     def create_isaacgym_env():
129 |         kwargs = dict(
130 |             sim_device=cfg.sim_device,
131 |             rl_device=cfg.rl_device,
132 |             graphics_device_id=cfg.graphics_device_id,
133 |             multi_gpu=cfg.multi_gpu,
134 |             cfg=cfg.task,
135 |             display=cfg.display,
136 |             record=cfg.capture_video,
137 |             has_headless_arg=False,
138 |         )
139 |         if not cfg.headless:
140 |             assert (
141 |                 "pcd" not in cfg.task_name.lower()
142 |             ), "TODO: add GUI support for PCD tasks"
143 |         if "pcd" not in cfg.task_name.lower():
144 |             kwargs["headless"] = cfg.headless
145 |             kwargs["has_headless_arg"] = True
146 |         envs = transic_envs.make(**kwargs)
147 |         if cfg.capture_video:
148 |             envs.is_vector_env = True
149 |             envs = WandbVideoCaptureWrapper(
150 |                 envs,
151 |                 n_parallel_recorders=cfg.n_parallel_recorders,
152 |                 n_successful_videos_to_record=cfg.n_successful_videos_to_record,
153 |             )
154 |         return envs
155 | 
156 |     env_configurations.register(
157 |         "rlgpu",
158 |         {
159 |             "vecenv_type": "RLGPU",
160 |             "env_creator": create_isaacgym_env,
161 |         },
162 |     )
163 | 
164 |     obs_spec = {}
165 |     if "central_value_config" in cfg.rl_train.params.config:
166 |         critic_net_cfg = cfg.rl_train.params.config.central_value_config.network
167 |         obs_spec["states"] = {
168 |             "names": list(critic_net_cfg.inputs.keys()),
169 |             "concat": not critic_net_cfg.name == "complex_net",
170 |             "space_name": "state_space",
171 |         }
172 | 
173 |     vecenv.register(
174 |         "RLGPU", lambda config_name, num_actors: ComplexObsRLGPUEnv(config_name)
175 |     )
176 | 
177 |     rlg_config_dict = omegaconf_to_dict(cfg.rl_train)
178 |     rlg_config_dict = preprocess_train_config(cfg, rlg_config_dict)
179 | 
180 |     observers = [RLGPUAlgoObserver()]
181 | 
182 |     if cfg.wandb_activate:
183 |         cfg.seed += global_rank
184 |         if global_rank == 0:
185 |             # initialize wandb only once per multi-gpu run
186 |             wandb_observer = WandbAlgoObserver(cfg)
187 |             observers.append(wandb_observer)
188 | 
189 |     def build_runner(algo_observer):
190 |         runner = Runner(algo_observer)
191 |         return runner
192 | 
193 |     # convert CLI arguments into dictionary
194 |     # create runner and set the settings
195 |     runner = build_runner(MultiObserver(observers))
196 |     runner.load(rlg_config_dict)
197 |     runner.reset()
198 | 
199 |     # dump config dict
200 |     if cfg.test:
201 |         prefix = "dump_" if cfg.save_rollouts else "test_"
202 |         experiment_dir = os.path.join(
203 |             "runs",
204 |             prefix
205 |             + cfg.rl_train.params.config.name
206 |             + "_{date:%m-%d-%H-%M-%S}".format(date=datetime.now()),
207 |         )
208 |     else:
209 |         experiment_dir = os.path.join(
210 |             "runs",
211 |             cfg.rl_train.params.config.name
212 |             + "_{date:%m-%d-%H-%M-%S}".format(date=datetime.now()),
213 |         )
214 |     os.makedirs(experiment_dir, exist_ok=True)
215 |     with open(os.path.join(experiment_dir, "config.yaml"), "w") as f:
216 |         f.write(OmegaConf.to_yaml(cfg))
217 | 
218 |     runner.run(
219 |         {
220 |             "train": not cfg.test,
221 |             "play": cfg.test,
222 |             "checkpoint": cfg.checkpoint,
223 |             "from_ckpt_epoch": cfg.from_ckpt_epoch,
224 |             "sigma": cfg.sigma if cfg.sigma != "" else None,
225 |             "save_rollouts": {
226 |                 "save_rollouts": cfg.save_rollouts,
227 |                 "rollout_saving_fpath": os.path.join(experiment_dir, "rollouts.hdf5"),
228 |                 "save_successful_rollouts_only": cfg.save_successful_rollouts_only,
229 |                 "num_rollouts_to_save": cfg.num_rollouts_to_save,
230 |                 "min_episode_length": cfg.min_episode_length,
231 |             },
232 |         }
233 |     )
234 | 
235 | 
236 | if __name__ == "__main__":
237 |     launch_rlg_hydra()
238 | 


--------------------------------------------------------------------------------
/media/SUSig-red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/media/SUSig-red.png


--------------------------------------------------------------------------------
/media/method_overview.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/media/method_overview.gif


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | rl-games==1.6.1
2 | gym==0.23.1
3 | hydra-core
4 | h5py
5 | dm_tree
6 | einops
7 | pytorch_lightning


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import pkg_resources
 4 | from setuptools import setup, find_packages
 5 | 
 6 | PKG_NAME = "transic"
 7 | VERSION = "0.0.1"
 8 | EXTRAS = {}
 9 | 
10 | 
11 | def _read_file(fname):
12 |     with pathlib.Path(fname).open() as fp:
13 |         return fp.read()
14 | 
15 | 
16 | def _read_install_requires():
17 |     with pathlib.Path("requirements.txt").open() as fp:
18 |         return [
19 |             str(requirement) for requirement in pkg_resources.parse_requirements(fp)
20 |         ]
21 | 
22 | 
23 | setup(
24 |     name=PKG_NAME,
25 |     version=VERSION,
26 |     author="TRANSIC Developers",
27 |     description="research project",
28 |     long_description=_read_file("README.md"),
29 |     long_description_content_type="text/markdown",
30 |     keywords=["Robotics", "Reinforcement Learning", "Machine Learning"],
31 |     license="Apache License, Version 2.0",
32 |     packages=find_packages(include=f"{PKG_NAME}.*"),
33 |     include_package_data=True,
34 |     zip_safe=False,
35 |     entry_points={"console_scripts": []},
36 |     install_requires=_read_install_requires(),
37 |     python_requires="==3.8.*",
38 |     classifiers=[
39 |         "Development Status :: 3 - Alpha",
40 |         "Topic :: Scientific/Engineering :: Robotics",
41 |         "Environment :: Console",
42 |         "Programming Language :: Python :: 3",
43 |     ],
44 | )
45 | 


--------------------------------------------------------------------------------
/transic/__init__.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import DictConfig, OmegaConf
 2 | 
 3 | 
 4 | def _is_cuda_solver(x, y):
 5 |     if isinstance(y, int):
 6 |         return y >= 0
 7 |     if isinstance(y, str):
 8 |         if "cuda" in y.lower():
 9 |             return True
10 |         else:
11 |             return x.lower() in y.lower()
12 | 
13 | 
14 | OmegaConf.register_new_resolver("eq", lambda x, y: x.lower() == y.lower())
15 | OmegaConf.register_new_resolver("contains", _is_cuda_solver)
16 | OmegaConf.register_new_resolver("if", lambda pred, a, b: a if pred else b)
17 | OmegaConf.register_new_resolver(
18 |     "resolve_default", lambda default, arg: default if arg == "" else arg
19 | )
20 | OmegaConf.register_new_resolver("multiply", lambda x, y: x * y)
21 | OmegaConf.register_new_resolver("floor_divide", lambda x, y: x // y)
22 | OmegaConf.register_new_resolver(
23 |     "find_rl_train_config",
24 |     lambda x: x + "PPO" if x[-3:] != "PCD" else x[:-3] + "PPO",
25 | )
26 | 


--------------------------------------------------------------------------------
/transic/distillation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/distillation/__init__.py


--------------------------------------------------------------------------------
/transic/distillation/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_module import DistillationDataModule
2 | 


--------------------------------------------------------------------------------
/transic/distillation/data/collate.py:
--------------------------------------------------------------------------------
 1 | from math import ceil
 2 | from copy import deepcopy
 3 | 
 4 | import numpy as np
 5 | 
 6 | from transic.utils.array import (
 7 |     any_slice,
 8 |     nested_np_split,
 9 |     get_batch_size,
10 |     any_concat,
11 |     any_stack,
12 |     any_ones_like,
13 |     any_to_torch_tensor,
14 | )
15 | 
16 | 
17 | def collate_fn(
18 |     sample_list,
19 |     with_matched_scene: bool,
20 |     ctx_len: int = 5,
21 | ):
22 |     """
23 |     sample_list: List of
24 |         Tuple[main_data: Dict, Tuple[Tuple[real_pcds, real_pcd_ee_masks], Tuple[sim_pcds, sim_pcd_ee_masks]]] if with_matched_scene is True
25 |         main_data: Dict, if with_matched_scene is False
26 |     """
27 |     if with_matched_scene:
28 |         main_data = [sample[0] for sample in sample_list]  # List[Dict]
29 |         matched_scenes = [
30 |             sample[1] for sample in sample_list
31 |         ]  # List[Tuple[Tuple[real_pcds, real_pcd_ee_masks], Tuple[sim_pcds, sim_pcd_ee_masks]]]
32 |     else:
33 |         main_data = sample_list
34 |         matched_scenes = None
35 | 
36 |     L_max = max(get_batch_size(sample) for sample in main_data)
37 |     N_chunks = ceil(L_max / ctx_len)
38 |     L_pad_max = N_chunks * ctx_len
39 | 
40 |     sample_structure = deepcopy(any_slice(main_data[0], np.s_[0:1]))
41 |     # pad to max length in this batch
42 |     processed_main_data = any_stack(
43 |         [
44 |             any_concat(
45 |                 [
46 |                     sample,
47 |                 ]
48 |                 + [any_ones_like(sample_structure)]
49 |                 * (L_pad_max - get_batch_size(sample)),
50 |                 dim=0,
51 |             )
52 |             for sample in main_data
53 |         ],
54 |         dim=0,
55 |     )  # dict of (B, L_pad_max, ...)
56 |     # construct mask
57 |     mask = any_stack(
58 |         [
59 |             any_concat(
60 |                 [
61 |                     np.ones((get_batch_size(sample),), dtype=bool),
62 |                     np.zeros((L_pad_max - get_batch_size(sample),), dtype=bool),
63 |                 ]
64 |             )
65 |             for sample in main_data
66 |         ],
67 |         dim=0,
68 |     )  # (B, L_pad_max)
69 | 
70 |     # split into chunks
71 |     processed_main_data = {
72 |         k: any_stack(v, dim=0)
73 |         for k, v in nested_np_split(processed_main_data, N_chunks, axis=1).items()
74 |     }  # dict of (N_chunks, B, ctx_len, ...)
75 |     mask = any_stack(np.split(mask, N_chunks, axis=1), dim=0)  # (N_chunks, B, ctx_len)
76 |     processed_main_data["pad_mask"] = mask
77 | 
78 |     # convert to tensor
79 |     processed_main_data = {
80 |         k: any_to_torch_tensor(v) for k, v in processed_main_data.items()
81 |     }
82 |     if matched_scenes is not None:
83 |         matched_scenes = any_stack(matched_scenes, dim=0)
84 |         matched_scenes_tensor = (
85 |             (
86 |                 any_to_torch_tensor(matched_scenes[0][0]),
87 |                 any_to_torch_tensor(matched_scenes[0][1]),
88 |             ),
89 |             (
90 |                 any_to_torch_tensor(matched_scenes[1][0]),
91 |                 any_to_torch_tensor(matched_scenes[1][1]),
92 |             ),
93 |         )
94 |         return processed_main_data, matched_scenes_tensor
95 |     return processed_main_data
96 | 


--------------------------------------------------------------------------------
/transic/distillation/data/data_module.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Tuple
 2 | from functools import partial
 3 | 
 4 | from torch.utils.data import DataLoader
 5 | from pytorch_lightning import LightningDataModule
 6 | 
 7 | from transic.distillation.data.dummy import DummyDataset
 8 | from transic.distillation.data.collate import collate_fn as _collate_fn
 9 | from transic.distillation.data.dataset import (
10 |     DistillationDataset,
11 |     DistillationSeqDataset,
12 | )
13 | 
14 | 
15 | class DistillationDataModule(LightningDataModule):
16 |     def __init__(
17 |         self,
18 |         *,
19 |         data_path: str,
20 |         matched_scene_data_path: Optional[str] = None,
21 |         ctx_len: int = -1,  # -1 means not using the SeqDataset at all
22 |         skip_first_n_steps: int,
23 |         sampled_pcd_points: int,
24 |         refresh_pcd_sampling_idxs_interval: float,
25 |         real_pcd_x_limits: Tuple[float, float],
26 |         real_pcd_y_limits: Tuple[float, float],
27 |         real_pcd_z_min: float,
28 |         batch_size: int,
29 |         dataloader_num_workers: int,
30 |         seed: Optional[int] = None,
31 |     ):
32 |         super().__init__()
33 |         self._data_path = data_path
34 |         self._matched_scene_data_path = matched_scene_data_path
35 |         self._skip_first_n_steps = skip_first_n_steps
36 |         self._sampled_pcd_points = sampled_pcd_points
37 |         self._refresh_pcd_sampling_idxs_interval = refresh_pcd_sampling_idxs_interval
38 |         self._real_pcd_x_limits = real_pcd_x_limits
39 |         self._real_pcd_y_limits = real_pcd_y_limits
40 |         self._real_pcd_z_min = real_pcd_z_min
41 | 
42 |         self._batch_size = batch_size
43 |         self._dataloader_num_workers = dataloader_num_workers
44 |         self._seed = seed
45 | 
46 |         self._ds_cls = DistillationSeqDataset if ctx_len != -1 else DistillationDataset
47 |         self._collate_fn = (
48 |             partial(
49 |                 _collate_fn,
50 |                 with_matched_scene=matched_scene_data_path is not None,
51 |                 ctx_len=ctx_len,
52 |             )
53 |             if ctx_len != -1
54 |             else None
55 |         )
56 |         self._train_dataset = None
57 | 
58 |     def setup(self, stage: str) -> None:
59 |         if stage == "fit" or stage is None:
60 |             self._train_dataset = self._ds_cls(
61 |                 fpath=self._data_path,
62 |                 matched_scene_fpath=self._matched_scene_data_path,
63 |                 sampled_pcd_points=self._sampled_pcd_points,
64 |                 skip_first_n_steps=self._skip_first_n_steps,
65 |                 refresh_pcd_sampling_idxs_interval=self._refresh_pcd_sampling_idxs_interval,
66 |                 real_pcd_x_limits=self._real_pcd_x_limits,
67 |                 real_pcd_y_limits=self._real_pcd_y_limits,
68 |                 real_pcd_z_min=self._real_pcd_z_min,
69 |                 seed=self._seed,
70 |             )
71 | 
72 |     def train_dataloader(self):
73 |         return DataLoader(
74 |             self._train_dataset,
75 |             batch_size=self._batch_size,
76 |             num_workers=min(self._batch_size, self._dataloader_num_workers),
77 |             pin_memory=True,
78 |             persistent_workers=True,
79 |             collate_fn=self._collate_fn,
80 |         )
81 | 
82 |     def val_dataloader(self):
83 |         return DummyDataset(batch_size=1).get_dataloader()
84 | 


--------------------------------------------------------------------------------
/transic/distillation/data/dummy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data import Dataset, DataLoader
 3 | 
 4 | 
 5 | class DummyDataset(Dataset):
 6 |     """
 7 |     For test_step(), simply returns None N times.
 8 |     test_step() can have arbitrary logic
 9 |     """
10 | 
11 |     def __init__(self, batch_size, epoch_len=1):
12 |         """
13 |         Still set batch_size because pytorch_lightning tracks it
14 |         """
15 |         self.n = epoch_len
16 |         self._batch_size = batch_size
17 | 
18 |     def __len__(self):
19 |         return self.n
20 | 
21 |     def __getitem__(self, i):
22 |         return np.zeros((self._batch_size,), dtype=bool)
23 | 
24 |     def get_dataloader(self) -> DataLoader:
25 |         """
26 |         Our dataset directly returns batched tensors instead of single samples,
27 |         so for DataLoader we don't need a real collate_fn and set batch_size=1
28 |         """
29 |         return DataLoader(
30 |             self,
31 |             batch_size=1,
32 |             num_workers=0,
33 |             pin_memory=True,
34 |             shuffle=False,
35 |             collate_fn=_singleton_collate_fn,
36 |         )
37 | 
38 | 
39 | def _singleton_collate_fn(tensor_list):
40 |     """
41 |     Our dataset directly returns batched tensors instead of single samples,
42 |     so for DataLoader we don't need a real collate_fn.
43 |     """
44 |     assert len(tensor_list) == 1, "INTERNAL: collate_fn only allows a single item"
45 |     return tensor_list[0]
46 | 


--------------------------------------------------------------------------------
/transic/distillation/policy/__init__.py:
--------------------------------------------------------------------------------
1 | from .pointnet_policy import PointNetPolicy
2 | from .rnn_pointnet_policy import RNNPointNetPolicy
3 | 


--------------------------------------------------------------------------------
/transic/distillation/policy/pointnet_policy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from transic.learn.policy import GMMHead, BasePolicy
 4 | from transic.nn.features import SimpleFeatureFusion, PointNet, Identity
 5 | 
 6 | 
 7 | class PointNetPolicy(BasePolicy):
 8 |     def __init__(
 9 |         self,
10 |         *,
11 |         point_channels: int = 3,
12 |         subtract_point_mean: bool = False,
13 |         add_ee_embd: bool = False,
14 |         ee_embd_dim: int,
15 |         pointnet_output_dim: int,
16 |         pointnet_hidden_dim: int,
17 |         pointnet_hidden_depth: int,
18 |         pointnet_activation: str = "gelu",
19 |         prop_input_dim: int,
20 |         feature_fusion_hidden_depth: int = 1,
21 |         feature_fusion_hidden_dim: int = 256,
22 |         feature_fusion_output_dim: int = 256,
23 |         feature_fusion_activation: str = "relu",
24 |         feature_fusion_add_input_activation: bool = False,
25 |         feature_fusion_add_output_activation: bool = False,
26 |         action_dim: int,
27 |         action_net_gmm_n_modes: int = 5,
28 |         action_net_hidden_dim: int,
29 |         action_net_hidden_depth: int,
30 |         action_net_activation: str = "relu",
31 |         deterministic_inference: bool = True,
32 |         gmm_low_noise_eval: bool = True,
33 |     ):
34 |         super().__init__()
35 | 
36 |         self.feature_extractor = SimpleFeatureFusion(
37 |             extractors={
38 |                 "pcd": PointNet(
39 |                     n_coordinates=point_channels,
40 |                     add_ee_embd=add_ee_embd,
41 |                     ee_embd_dim=ee_embd_dim,
42 |                     output_dim=pointnet_output_dim,
43 |                     hidden_dim=pointnet_hidden_dim,
44 |                     hidden_depth=pointnet_hidden_depth,
45 |                     activation=pointnet_activation,
46 |                     subtract_mean=subtract_point_mean,
47 |                 ),
48 |                 "proprioception": Identity(prop_input_dim),
49 |             },
50 |             hidden_depth=feature_fusion_hidden_depth,
51 |             hidden_dim=feature_fusion_hidden_dim,
52 |             output_dim=feature_fusion_output_dim,
53 |             activation=feature_fusion_activation,
54 |             add_input_activation=feature_fusion_add_input_activation,
55 |             add_output_activation=feature_fusion_add_output_activation,
56 |         )
57 | 
58 |         self.action_net = GMMHead(
59 |             feature_fusion_output_dim,
60 |             n_modes=action_net_gmm_n_modes,
61 |             action_dim=action_dim,
62 |             hidden_dim=action_net_hidden_dim,
63 |             hidden_depth=action_net_hidden_depth,
64 |             activation=action_net_activation,
65 |             low_noise_eval=gmm_low_noise_eval,
66 |         )
67 |         self._deterministic_inference = deterministic_inference
68 | 
69 |     def forward(self, obs):
70 |         return self.action_net(self.feature_extractor(obs))
71 | 
72 |     @torch.no_grad()
73 |     def act(self, obs, deterministic=None):
74 |         dist = self.forward(obs)
75 |         if deterministic is None:
76 |             deterministic = self._deterministic_inference
77 |         if deterministic:
78 |             return dist.mode()
79 |         else:
80 |             return dist.sample()
81 | 


--------------------------------------------------------------------------------
/transic/distillation/policy/rnn_pointnet_policy.py:
--------------------------------------------------------------------------------
  1 | from typing import Literal, Optional
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from transic.learn.policy import GMMHead, BasePolicy
  7 | from transic.nn.features import SimpleFeatureFusion, PointNet, Identity
  8 | from transic.utils.array import get_batch_size, any_slice
  9 | 
 10 | 
 11 | RNN_CLS = {
 12 |     "lstm": nn.LSTM,
 13 |     "gru": nn.GRU,
 14 | }
 15 | 
 16 | 
 17 | class RNNPointNetPolicy(BasePolicy):
 18 |     is_sequence_policy = True
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         *,
 23 |         point_channels: int = 3,
 24 |         subtract_point_mean: bool = False,
 25 |         add_ee_embd: bool = False,
 26 |         ee_embd_dim: int,
 27 |         pointnet_output_dim: int,
 28 |         pointnet_hidden_dim: int,
 29 |         pointnet_hidden_depth: int,
 30 |         pointnet_activation: str = "gelu",
 31 |         prop_input_dim: int,
 32 |         feature_fusion_hidden_depth: int = 1,
 33 |         feature_fusion_hidden_dim: int = 256,
 34 |         feature_fusion_output_dim: int = 256,
 35 |         feature_fusion_activation: str = "relu",
 36 |         feature_fusion_add_input_activation: bool = False,
 37 |         feature_fusion_add_output_activation: bool = False,
 38 |         rnn_type: Literal["lstm", "gru"],
 39 |         rnn_n_layers: int = 2,
 40 |         rnn_hidden_dim: int = 256,
 41 |         ctx_len: int,
 42 |         action_dim: int,
 43 |         action_net_gmm_n_modes: int = 5,
 44 |         action_net_hidden_dim: int,
 45 |         action_net_hidden_depth: int,
 46 |         action_net_activation: str = "relu",
 47 |         deterministic_inference: bool = True,
 48 |         gmm_low_noise_eval: bool = True,
 49 |     ):
 50 |         super().__init__()
 51 | 
 52 |         self.ctx_len = ctx_len
 53 | 
 54 |         self.feature_extractor = SimpleFeatureFusion(
 55 |             extractors={
 56 |                 "pcd": PointNet(
 57 |                     n_coordinates=point_channels,
 58 |                     add_ee_embd=add_ee_embd,
 59 |                     ee_embd_dim=ee_embd_dim,
 60 |                     output_dim=pointnet_output_dim,
 61 |                     hidden_dim=pointnet_hidden_dim,
 62 |                     hidden_depth=pointnet_hidden_depth,
 63 |                     activation=pointnet_activation,
 64 |                     subtract_mean=subtract_point_mean,
 65 |                 ),
 66 |                 "proprioception": Identity(prop_input_dim),
 67 |             },
 68 |             hidden_depth=feature_fusion_hidden_depth,
 69 |             hidden_dim=feature_fusion_hidden_dim,
 70 |             output_dim=feature_fusion_output_dim,
 71 |             activation=feature_fusion_activation,
 72 |             add_input_activation=feature_fusion_add_input_activation,
 73 |             add_output_activation=feature_fusion_add_output_activation,
 74 |         )
 75 | 
 76 |         assert rnn_type in ["lstm", "gru"]
 77 |         rnn_cls = RNN_CLS[rnn_type]
 78 |         self.rnn = rnn_cls(
 79 |             input_size=feature_fusion_output_dim,
 80 |             hidden_size=rnn_hidden_dim,
 81 |             num_layers=rnn_n_layers,
 82 |             batch_first=True,
 83 |         )
 84 | 
 85 |         self.action_net = GMMHead(
 86 |             rnn_hidden_dim,
 87 |             n_modes=action_net_gmm_n_modes,
 88 |             action_dim=action_dim,
 89 |             hidden_dim=action_net_hidden_dim,
 90 |             hidden_depth=action_net_hidden_depth,
 91 |             activation=action_net_activation,
 92 |             low_noise_eval=gmm_low_noise_eval,
 93 |         )
 94 |         self._deterministic_inference = deterministic_inference
 95 | 
 96 |     def get_initial_state(self, batch_size: int, timesteps: Optional[int] = None):
 97 |         h_0 = torch.zeros(
 98 |             self.rnn.num_layers, batch_size, self.rnn.hidden_size, device=self.device
 99 |         )
100 |         if isinstance(self.rnn, nn.LSTM):
101 |             c_0 = torch.zeros_like(h_0)
102 |             return h_0, c_0
103 |         return h_0
104 | 
105 |     def update_state(self, *, old_state, new_state, idxs):
106 |         if isinstance(self.rnn, nn.LSTM):
107 |             h_old, c_old = old_state
108 |             h_new, c_new = new_state
109 |             h_old[:, idxs] = h_new
110 |             c_old[:, idxs] = c_new
111 |             return h_old, c_old
112 |         elif isinstance(self.rnn, nn.GRU):
113 |             old_state[:, idxs] = new_state
114 |             return old_state
115 |         else:
116 |             raise NotImplementedError(f"Unknown RNN type {type(self.rnn)}")
117 | 
118 |     def forward(self, obs, policy_state):
119 |         """
120 |         obs: dict of (B, L, ...)
121 |         rnn_state: (h_0, c_0) or h_0
122 |         """
123 |         x = self.feature_extractor(obs)
124 |         x, policy_state = self.rnn(x, policy_state)
125 |         return self.action_net(x), policy_state
126 | 
127 |     @torch.no_grad()
128 |     def act(self, obs, policy_state, deterministic=None):
129 |         """
130 |         obs: dict of (B, L=1, ...)
131 |         rnn_state: (h_0, c_0) or h_0
132 |         """
133 |         assert get_batch_size(any_slice(obs, 0), strict=True) == 1, "Use L=1 for act"
134 |         dist, policy_state = self.forward(obs, policy_state)
135 |         if deterministic is None:
136 |             deterministic = self._deterministic_inference
137 |         if deterministic:
138 |             action = dist.mode()
139 |         else:
140 |             action = dist.sample()
141 |         # action is (B, L=1, A), reduce to (B, A)
142 |         action = action[:, 0]
143 |         return action, policy_state
144 | 


--------------------------------------------------------------------------------
/transic/learn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/learn/__init__.py


--------------------------------------------------------------------------------
/transic/learn/lightning.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | import os
  3 | import time
  4 | from copy import deepcopy
  5 | import pprint
  6 | 
  7 | import sys
  8 | from omegaconf import DictConfig, OmegaConf, ListConfig
  9 | import pytorch_lightning as pl
 10 | import pytorch_lightning.loggers as pl_loggers
 11 | from pytorch_lightning.callbacks import (
 12 |     Callback,
 13 |     ModelCheckpoint,
 14 |     ProgressBar,
 15 |     TQDMProgressBar,
 16 | )
 17 | from pytorch_lightning.utilities import rank_zero_only
 18 | from hydra.utils import instantiate
 19 | 
 20 | 
 21 | class LightingTrainer:
 22 |     def __init__(self, cfg: DictConfig):
 23 |         cfg = deepcopy(cfg)
 24 |         OmegaConf.set_struct(cfg, False)
 25 |         self.cfg = cfg
 26 |         self.run_command_args = sys.argv[1:]
 27 |         run_name = self.generate_run_name(cfg)
 28 |         self.run_dir = os.path.join(cfg.exp_root_dir, run_name)
 29 |         rank_zero_print("Run name:", run_name, "\nExp dir:", self.run_dir)
 30 |         os.makedirs(self.run_dir, exist_ok=True)
 31 |         os.makedirs(os.path.join(self.run_dir, "tb"), exist_ok=True)
 32 |         os.makedirs(os.path.join(self.run_dir, "logs"), exist_ok=True)
 33 |         os.makedirs(os.path.join(self.run_dir, "ckpt"), exist_ok=True)
 34 |         OmegaConf.save(cfg, os.path.join(self.run_dir, "conf.yaml"))
 35 |         self.cfg = cfg
 36 |         self.run_name = run_name
 37 |         self.ckpt_cfg = cfg.trainer.pop("checkpoint")
 38 |         self.data_module = self.create_data_module(cfg)
 39 |         self._monkey_patch_add_info(self.data_module)
 40 |         self.trainer = self.create_trainer(cfg)
 41 |         self.module = self.create_module(cfg)
 42 |         self.module.data_module = self.data_module
 43 |         self._monkey_patch_add_info(self.module)
 44 | 
 45 |     def create_module(self, cfg):
 46 |         return instantiate(cfg.module, _recursive_=False)
 47 | 
 48 |     def create_data_module(self, cfg):
 49 |         return instantiate(cfg.data_module)
 50 | 
 51 |     def generate_run_name(self, cfg):
 52 |         return cfg.run_name + "_" + time.strftime("%Y%m%d-%H%M%S")
 53 | 
 54 |     def _monkey_patch_add_info(self, obj):
 55 |         """
 56 |         Add useful info to module and data_module so they can access directly
 57 |         """
 58 |         # our own info
 59 |         obj.run_config = self.cfg
 60 |         obj.run_name = self.run_name
 61 |         obj.run_command_args = self.run_command_args
 62 |         # add properties from trainer
 63 |         for attr in [
 64 |             "global_rank",
 65 |             "local_rank",
 66 |             "world_size",
 67 |             "num_nodes",
 68 |             "num_processes",
 69 |             "node_rank",
 70 |             "num_gpus",
 71 |             "data_parallel_device_ids",
 72 |         ]:
 73 |             if hasattr(obj, attr):
 74 |                 continue
 75 |             setattr(
 76 |                 obj.__class__,
 77 |                 attr,
 78 |                 # force capture 'attr'
 79 |                 property(lambda self, attr=attr: getattr(self.trainer, attr)),
 80 |             )
 81 | 
 82 |     def create_loggers(self, cfg) -> List[pl.loggers.Logger]:
 83 |         loggers = [
 84 |             pl_loggers.TensorBoardLogger(self.run_dir, name="tb", version=""),
 85 |             pl_loggers.CSVLogger(self.run_dir, name="logs", version=""),
 86 |         ]
 87 |         if cfg.use_wandb:
 88 |             loggers.append(
 89 |                 pl_loggers.WandbLogger(
 90 |                     name=cfg.wandb_run_name, project=cfg.wandb_project, id=self.run_name
 91 |                 )
 92 |             )
 93 |         return loggers
 94 | 
 95 |     def create_callbacks(self) -> List[Callback]:
 96 |         ModelCheckpoint.FILE_EXTENSION = ".pth"
 97 |         callbacks = []
 98 |         if isinstance(self.ckpt_cfg, DictConfig):
 99 |             ckpt = ModelCheckpoint(
100 |                 dirpath=os.path.join(self.run_dir, "ckpt"), **self.ckpt_cfg
101 |             )
102 |             callbacks.append(ckpt)
103 |         else:
104 |             assert isinstance(self.ckpt_cfg, ListConfig)
105 |             for _cfg in self.ckpt_cfg:
106 |                 ckpt = ModelCheckpoint(
107 |                     dirpath=os.path.join(self.run_dir, "ckpt"), **_cfg
108 |                 )
109 |                 callbacks.append(ckpt)
110 | 
111 |         if not any(isinstance(c, ProgressBar) for c in callbacks):
112 |             callbacks.append(TQDMProgressBar())
113 |         rank_zero_print(
114 |             "Lightning callbacks:", [c.__class__.__name__ for c in callbacks]
115 |         )
116 |         return callbacks
117 | 
118 |     def create_trainer(self, cfg) -> pl.Trainer:
119 |         assert "trainer" in cfg
120 |         C = cfg.trainer
121 |         return instantiate(
122 |             C, logger=self.create_loggers(cfg), callbacks=self.create_callbacks()
123 |         )
124 | 
125 |     @property
126 |     def tb_logger(self):
127 |         return self.logger[0].experiment
128 | 
129 |     def fit(self):
130 |         return self.trainer.fit(
131 |             self.module,
132 |             datamodule=self.data_module,
133 |             ckpt_path=None,
134 |         )
135 | 
136 | 
137 | def pprint_(*objs, **kwargs):
138 |     """
139 |     Use pprint to format the objects
140 |     """
141 |     print(
142 |         *[
143 |             pprint.pformat(obj, indent=2) if not isinstance(obj, str) else obj
144 |             for obj in objs
145 |         ],
146 |         **kwargs,
147 |     )
148 | 
149 | 
150 | @rank_zero_only
151 | def rank_zero_print(*msg, **kwargs):
152 |     pprint_(*msg, **kwargs)
153 | 


--------------------------------------------------------------------------------
/transic/learn/lr_schedule.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | 
 6 | def generate_cosine_schedule(
 7 |     base_value,
 8 |     final_value,
 9 |     epochs,
10 |     steps_per_epoch,
11 |     warmup_epochs=0,
12 |     warmup_start_value=0,
13 | ) -> np.ndarray:
14 |     warmup_schedule = np.array([])
15 |     warmup_iters = int(warmup_epochs * steps_per_epoch)
16 |     if warmup_epochs > 0:
17 |         warmup_schedule = np.linspace(warmup_start_value, base_value, warmup_iters)
18 | 
19 |     iters = np.arange(int(epochs * steps_per_epoch) - warmup_iters)
20 |     schedule = np.array(
21 |         [
22 |             final_value
23 |             + 0.5
24 |             * (base_value - final_value)
25 |             * (1 + math.cos(math.pi * i / (len(iters))))
26 |             for i in iters
27 |         ]
28 |     )
29 |     schedule = np.concatenate((warmup_schedule, schedule))
30 |     assert len(schedule) == int(epochs * steps_per_epoch)
31 |     return schedule
32 | 
33 | 
34 | class CosineScheduleFunction:
35 |     def __init__(
36 |         self,
37 |         base_value,
38 |         final_value,
39 |         epochs,
40 |         steps_per_epoch,
41 |         warmup_epochs=0,
42 |         warmup_start_value=0,
43 |     ):
44 |         """
45 |         Usage:
46 |             scheduler = torch.optim.lr_scheduler.LambdaLR(
47 |                 optimizer=optimizer, lr_lambda=CosineScheduleFunction(**kwargs)
48 |             )
49 |             or simply use CosineScheduler(**kwargs)
50 | 
51 |         Args:
52 |             epochs: effective epochs for the cosine schedule, *including* warmup
53 |                 after these epochs, scheduler will output `final_value` ever after
54 |         """
55 |         assert warmup_epochs < epochs, f"{warmup_epochs=} must be < {epochs=}"
56 |         self._effective_steps = int(epochs * steps_per_epoch)
57 |         self.schedule = generate_cosine_schedule(
58 |             base_value=base_value,
59 |             final_value=final_value,
60 |             epochs=epochs,
61 |             steps_per_epoch=steps_per_epoch,
62 |             warmup_epochs=warmup_epochs,
63 |             warmup_start_value=warmup_start_value,
64 |         )
65 |         assert self.schedule.shape == (self._effective_steps,)
66 |         self._final_value = final_value
67 |         self._steps_tensor = torch.tensor(0, dtype=torch.long)  # for register buffer
68 | 
69 |     def register_buffer(self, module: torch.nn.Module, name="cosine_steps"):
70 |         module.register_buffer(name, self._steps_tensor, persistent=True)
71 | 
72 |     def __call__(self, step):
73 |         self._steps_tensor.copy_(torch.tensor(step))
74 |         if step >= self._effective_steps:
75 |             val = self._final_value
76 |         else:
77 |             val = self.schedule[step]
78 |         return val
79 | 


--------------------------------------------------------------------------------
/transic/learn/optimizer_group.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Union, List, Tuple
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from transic.utils.misc_utils import match_patterns
 7 | 
 8 | FilterType = Union[
 9 |     Callable[[str, torch.Tensor], bool], List[str], Tuple[str], str, None
10 | ]
11 | 
12 | 
13 | def default_optimizer_groups(
14 |     model: nn.Module,
15 |     weight_decay: float,
16 |     lr_scale: float = 1.0,
17 |     no_decay_filter: FilterType = None,
18 |     exclude_filter: FilterType = None,
19 | ):
20 |     """
21 |     lr_scale is only effective when using with enlight.learn.lr_schedule.LambdaLRWithScale
22 | 
23 |     Returns:
24 |         [{'lr_scale': 1.0, 'weight_decay': weight_decay, 'params': decay_group},
25 |          {'lr_scale': 1.0, 'weight_decay': 0.0, 'params': no_decay_group}],
26 |         list of all param_ids processed
27 |     """
28 |     no_decay_filter = _transform_filter(no_decay_filter)
29 |     exclude_filter = _transform_filter(exclude_filter)
30 |     decay_group = []
31 |     no_decay_group = []
32 |     all_params_id = []
33 |     for n, p in model.named_parameters():
34 |         all_params_id.append(id(p))
35 |         if not p.requires_grad or exclude_filter(n, p):
36 |             continue
37 | 
38 |         # no decay: all 1D parameters and model specific ones
39 |         if p.ndim == 1 or no_decay_filter(n, p):
40 |             no_decay_group.append(p)
41 |         else:
42 |             decay_group.append(p)
43 |     return [
44 |         {"weight_decay": weight_decay, "params": decay_group, "lr_scale": lr_scale},
45 |         {"weight_decay": 0.0, "params": no_decay_group, "lr_scale": lr_scale},
46 |     ], all_params_id
47 | 
48 | 
49 | def _transform_filter(filter: FilterType):
50 |     """
51 |     Filter can be:
52 |         - None: always returns False
53 |         - function(name, p) -> True to activate, False to deactivate
54 |         - list of strings to match, can have wildcard
55 |     """
56 |     if filter is None:
57 |         return lambda name, p: False
58 |     elif callable(filter):
59 |         return filter
60 |     elif isinstance(filter, (str, list, tuple)):
61 |         if isinstance(filter, str):
62 |             filter = [filter]
63 |         return lambda name, p: match_patterns(name, include=filter)
64 |     else:
65 |         raise ValueError(f"Invalid filter: {filter}")
66 | 


--------------------------------------------------------------------------------
/transic/learn/policy/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BasePolicy
2 | from .distributions import GMMHead, CategoricalNet
3 | 


--------------------------------------------------------------------------------
/transic/learn/policy/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from pytorch_lightning import LightningModule
 4 | 
 5 | 
 6 | class BasePolicy(ABC, LightningModule):
 7 |     is_sequence_policy: bool = False
 8 | 
 9 |     @abstractmethod
10 |     def forward(self, *args, **kwargs):
11 |         """
12 |         Forward the NN.
13 |         """
14 |         pass
15 | 
16 |     @abstractmethod
17 |     def act(self, *args, **kwargs):
18 |         """
19 |         Given obs, return action.
20 |         """
21 |         pass
22 | 


--------------------------------------------------------------------------------
/transic/nn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/nn/__init__.py


--------------------------------------------------------------------------------
/transic/nn/features/__init__.py:
--------------------------------------------------------------------------------
1 | from .embedding import Embedding
2 | from .pointcloud import *
3 | from .fusion import SimpleFeatureFusion
4 | from .identity import Identity
5 | 


--------------------------------------------------------------------------------
/transic/nn/features/embedding.py:
--------------------------------------------------------------------------------
1 | from torch.nn import Embedding as _Embedding
2 | 
3 | 
4 | class Embedding(_Embedding):
5 |     def __init__(self, *args, **kwargs):
6 |         super().__init__(*args, **kwargs)
7 |         self.output_dim = self.embedding_dim
8 | 


--------------------------------------------------------------------------------
/transic/nn/features/fusion.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from transic.nn.mlp import build_mlp
 7 | from transic.learn.optimizer_group import default_optimizer_groups
 8 | 
 9 | 
10 | class SimpleFeatureFusion(nn.Module):
11 |     def __init__(
12 |         self,
13 |         extractors: dict[str, nn.Module],
14 |         hidden_depth: int,
15 |         hidden_dim: int,
16 |         output_dim: int,
17 |         activation,
18 |         add_input_activation: bool,
19 |         add_output_activation: bool,
20 |     ):
21 |         super().__init__()
22 |         self._extractors = nn.ModuleDict(extractors)
23 |         extractors_output_dim = sum(e.output_dim for e in extractors.values())
24 |         self.output_dim = output_dim
25 |         self._head = build_mlp(
26 |             input_dim=extractors_output_dim,
27 |             hidden_dim=hidden_dim,
28 |             output_dim=output_dim,
29 |             hidden_depth=hidden_depth,
30 |             activation=activation,
31 |             weight_init="orthogonal",
32 |             bias_init="zeros",
33 |             norm_type=None,
34 |             add_input_activation=add_input_activation,
35 |             add_input_norm=False,
36 |             add_output_activation=add_output_activation,
37 |             add_output_norm=False,
38 |         )
39 | 
40 |         self._obs_groups = None
41 |         self._obs_key_checked = False
42 | 
43 |     def _check_obs_key_match(self, obs: dict, strict: bool = False):
44 |         if strict:
45 |             assert set(self._extractors.keys()) == set(obs.keys())
46 |         elif set(self._extractors.keys()) != set(obs.keys()):
47 |             print(
48 |                 f"[warning] obs key mismatch: {set(self._extractors.keys())} != {set(obs.keys())}"
49 |             )
50 | 
51 |     def forward(self, x):
52 |         x = self._group_obs(x)
53 |         if not self._obs_key_checked:
54 |             self._check_obs_key_match(x, strict=False)
55 |             self._obs_key_checked = True
56 |         x = {k: v.forward(x[k]) for k, v in self._extractors.items()}
57 |         x = torch.cat([x[k] for k in sorted(x.keys())], dim=-1)
58 |         x = self._head(x)
59 |         return x
60 | 
61 |     def _group_obs(self, obs):
62 |         obs_keys = obs.keys()
63 |         if self._obs_groups is None:
64 |             # group by /
65 |             obs_groups = {k.split("/")[0] for k in obs_keys}
66 |             self._obs_groups = sorted(list(obs_groups))
67 |         obs_rtn = {}
68 |         for g in self._obs_groups:
69 |             is_subgroup = any(k.startswith(f"{g}/") for k in obs_keys)
70 |             if is_subgroup:
71 |                 obs_rtn[g] = {
72 |                     k.split("/", 1)[1]: v
73 |                     for k, v in obs.items()
74 |                     if k.startswith(f"{g}/")
75 |                 }
76 |             else:
77 |                 obs_rtn[g] = obs[g]
78 |         return obs_rtn
79 | 
80 |     def get_optimizer_groups(self, weight_decay, lr_layer_decay, lr_scale=1.0):
81 |         extractors_pgs, extractor_pids = [], []
82 |         for extractor in self._extractors.values():
83 |             pg, pid = extractor.get_optimizer_groups(
84 |                 weight_decay=weight_decay,
85 |                 lr_layer_decay=lr_layer_decay,
86 |                 lr_scale=lr_scale,
87 |             )
88 |             extractors_pgs.extend(pg)
89 |             extractor_pids.extend(pid)
90 |         head_pg, head_pid = default_optimizer_groups(
91 |             self,
92 |             weight_decay=weight_decay,
93 |             lr_scale=lr_scale,
94 |             exclude_filter=lambda name, p: id(p) in extractor_pids,
95 |         )
96 |         return extractors_pgs + head_pg, extractor_pids + head_pid
97 | 


--------------------------------------------------------------------------------
/transic/nn/features/identity.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class Identity(nn.Module):
 5 |     def __init__(
 6 |         self,
 7 |         input_dim: int,
 8 |     ):
 9 |         super().__init__()
10 |         self._output_dim = input_dim
11 | 
12 |     @property
13 |     def output_dim(self):
14 |         return self._output_dim
15 | 
16 |     def forward(self, x):
17 |         return x
18 | 
19 |     def get_optimizer_groups(self, *args, **kwargs):
20 |         return [], []
21 | 


--------------------------------------------------------------------------------
/transic/nn/features/pointcloud/__init__.py:
--------------------------------------------------------------------------------
1 | from .pointnet import PointNet
2 | from .set_transformer import SetXFPCDEncoder
3 | 


--------------------------------------------------------------------------------
/transic/nn/features/pointcloud/pointnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from transic.nn.mlp import build_mlp
 5 | from transic.learn.optimizer_group import default_optimizer_groups
 6 | 
 7 | 
 8 | class _PointNetSimplified(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         *,
12 |         point_channels: int = 3,
13 |         output_dim: int,
14 |         hidden_dim: int,
15 |         hidden_depth: int,
16 |         activation: str = "gelu",
17 |     ):
18 |         super().__init__()
19 |         self._mlp = build_mlp(
20 |             input_dim=point_channels,
21 |             hidden_dim=hidden_dim,
22 |             output_dim=output_dim,
23 |             hidden_depth=hidden_depth,
24 |             activation=activation,
25 |         )
26 |         self.output_dim = output_dim
27 | 
28 |     def forward(self, x):
29 |         """
30 |         x: (..., points, point_channels)
31 |         """
32 |         x = self._mlp(x)  # (..., points, output_dim)
33 |         x = torch.max(x, dim=-2)[0]  # (..., output_dim)
34 |         return x
35 | 
36 | 
37 | class PointNet(nn.Module):
38 |     def __init__(
39 |         self,
40 |         *,
41 |         n_coordinates: int = 3,
42 |         add_ee_embd: bool = False,
43 |         ee_embd_dim: int = 128,
44 |         output_dim: int = 512,
45 |         hidden_dim: int = 512,
46 |         hidden_depth: int = 2,
47 |         activation: str = "gelu",
48 |         subtract_mean: bool = False,
49 |     ):
50 |         super().__init__()
51 |         pn_in_channels = n_coordinates
52 |         if add_ee_embd:
53 |             pn_in_channels += ee_embd_dim
54 |         if subtract_mean:
55 |             pn_in_channels += n_coordinates
56 |         self.pointnet = _PointNetSimplified(
57 |             point_channels=pn_in_channels,
58 |             output_dim=output_dim,
59 |             hidden_dim=hidden_dim,
60 |             hidden_depth=hidden_depth,
61 |             activation=activation,
62 |         )
63 |         self.ee_embd_layer = None
64 |         if add_ee_embd:
65 |             self.ee_embd_layer = nn.Embedding(2, embedding_dim=ee_embd_dim)
66 |         self.add_ee_embd = add_ee_embd
67 |         self.subtract_mean = subtract_mean
68 |         self.output_dim = self.pointnet.output_dim
69 | 
70 |     def forward(self, x):
71 |         """
72 |         x["coordinate"]: (..., points, coordinates)
73 |         """
74 |         point = x["coordinate"]
75 |         ee_mask = x.get("ee_mask", None)
76 |         if self.subtract_mean:
77 |             mean = torch.mean(point, dim=-2, keepdim=True)  # (..., 1, coordinates)
78 |             mean = torch.broadcast_to(mean, point.shape)  # (..., points, coordinates)
79 |             point = point - mean
80 |             point = torch.cat([point, mean], dim=-1)  # (..., points, 2 * coordinates)
81 |         if self.add_ee_embd:
82 |             ee_mask = torch.tensor(ee_mask, dtype=torch.long)  # (..., points)
83 |             ee_embd = self.ee_embd_layer(ee_mask)  # (..., points, ee_embd_dim)
84 |             x = torch.concat(
85 |                 [point, ee_embd], dim=-1
86 |             )  # (..., points, coordinates + ee_embd_dim)
87 |         return self.pointnet(x)
88 | 
89 |     def get_optimizer_groups(self, weight_decay, lr_layer_decay, lr_scale=1.0):
90 |         pg, pids = default_optimizer_groups(
91 |             self,
92 |             weight_decay=weight_decay,
93 |             lr_scale=lr_scale,
94 |             no_decay_filter=["ee_embd_layer.*"],
95 |         )
96 |         return pg, pids
97 | 


--------------------------------------------------------------------------------
/transic/nn/features/pointcloud/set_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .set_xf_pcd_encoder import SetXFPCDEncoder
2 | 


--------------------------------------------------------------------------------
/transic/nn/features/pointcloud/set_transformer/set_transformer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | https://github.com/juho-lee/set_transformer
  3 | Paper: Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks
  4 | """
  5 | from __future__ import annotations
  6 | from typing import Literal
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import math
 11 | from einops import rearrange
 12 | 
 13 | 
 14 | __all__ = [
 15 |     "SetAttention",
 16 |     "SelfSetAttention",
 17 |     "InducedSetAttention",
 18 |     "PoolingSetAttention",
 19 |     "IdentityKeyValuePoolingAttention",
 20 | ]
 21 | 
 22 | 
 23 | class SetAttention(nn.Module):
 24 |     """
 25 |     "MAB" in the original paper
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         dim_Q,
 31 |         dim_K,
 32 |         dim_V,
 33 |         num_heads,
 34 |         layer_norm=False,
 35 |     ):
 36 |         """
 37 |         Args:
 38 |             identity_key: do not transform K, use nn.Identity(), useful for attention
 39 |               pooling where key is the original features and we don't want to transform it.
 40 |               See CoCa paper: https://arxiv.org/abs/2205.01917
 41 |         """
 42 |         super().__init__()
 43 |         self.dim_V = dim_V
 44 |         self.num_heads = num_heads
 45 |         assert self.dim_V % self.num_heads == 0
 46 |         self.fc_q = nn.Linear(dim_Q, dim_V)
 47 |         self.fc_k = nn.Linear(dim_K, dim_V)
 48 |         self.fc_v = nn.Linear(dim_K, dim_V)
 49 |         if layer_norm:
 50 |             self.ln0 = nn.LayerNorm(dim_V)
 51 |             self.ln1 = nn.LayerNorm(dim_V)
 52 |         else:
 53 |             self.ln0 = nn.Identity()
 54 |             self.ln1 = nn.Identity()
 55 |         self.fc_o = nn.Linear(dim_V, dim_V)
 56 |         self.act = nn.ReLU(inplace=True)
 57 | 
 58 |     def forward(self, Q, K, mask=None):
 59 |         """
 60 |         mask: if not none, should be (B, L_src, L_trg)
 61 |         """
 62 |         if mask is not None:
 63 |             assert mask.shape[0] == Q.shape[0]
 64 |             assert mask.shape[1] == Q.shape[1]
 65 |             assert mask.shape[2] == K.shape[1]
 66 |             # check valid mask
 67 |             assert mask.dtype == torch.bool
 68 |             assert torch.all(
 69 |                 mask.sum(dim=2) > 0
 70 |             ), "each source token should attend to at least one target token"
 71 |             # repeat mask num_heads times
 72 |             mask = torch.cat([mask] * self.num_heads, 0)
 73 |         Q = self.fc_q(Q)
 74 |         K, V = self.fc_k(K), self.fc_v(K)
 75 | 
 76 |         dim_split = self.dim_V // self.num_heads
 77 |         Q_ = torch.cat(Q.split(dim_split, 2), 0)
 78 |         K_ = torch.cat(K.split(dim_split, 2), 0)
 79 |         V_ = torch.cat(V.split(dim_split, 2), 0)
 80 | 
 81 |         A = Q_.bmm(K_.transpose(1, 2)) / math.sqrt(self.dim_V)
 82 |         if mask is not None:
 83 |             A.masked_fill_(mask == 0, -float("inf"))
 84 |         A = torch.softmax(A, 2)
 85 |         O = torch.cat((Q_ + A.bmm(V_)).split(Q.size(0), 0), 2)
 86 |         O = self.ln0(O)
 87 |         O = O + self.act(self.fc_o(O))
 88 |         O = self.ln1(O)
 89 |         return O
 90 | 
 91 | 
 92 | class SelfSetAttention(SetAttention):
 93 |     """
 94 |     "SAB" in the original paper
 95 |     """
 96 | 
 97 |     def forward(self, X):
 98 |         return super().forward(X, X)
 99 | 
100 | 
101 | class InducedSetAttention(nn.Module):
102 |     """
103 |     "ISAB" in the original paper
104 |     """
105 | 
106 |     def __init__(
107 |         self,
108 |         dim_in,
109 |         dim_out,
110 |         num_heads,
111 |         num_queries,
112 |         layer_norm=False,
113 |     ):
114 |         super().__init__()
115 |         self.I = nn.Parameter(torch.Tensor(1, num_queries, dim_out))
116 |         nn.init.xavier_uniform_(self.I)
117 |         self.mab0 = SetAttention(
118 |             dim_Q=dim_out,
119 |             dim_K=dim_in,
120 |             dim_V=dim_out,
121 |             num_heads=num_heads,
122 |             layer_norm=layer_norm,
123 |         )
124 |         self.mab1 = SetAttention(
125 |             dim_Q=dim_in,
126 |             dim_K=dim_out,
127 |             dim_V=dim_out,
128 |             num_heads=num_heads,
129 |             layer_norm=layer_norm,
130 |         )
131 | 
132 |     def forward(self, X):
133 |         H = self.mab0(self.I.repeat(X.size(0), 1, 1), X)
134 |         return self.mab1(X, H)
135 | 
136 | 
137 | class PoolingSetAttention(nn.Module):
138 |     """
139 |     "PMA" in the original paper
140 |     """
141 | 
142 |     def __init__(
143 |         self,
144 |         dim,
145 |         num_heads,
146 |         num_queries,
147 |         pool_type: Literal["avg", "concat", "none", None] = None,
148 |         layer_norm=False,
149 |     ):
150 |         """
151 |         Args:
152 |             num_queries: pools the original set into `num_queries` features
153 |             pool_type: 'avg', 'concat', or None
154 |               - 'avg': average pooling, returns [B, dim]
155 |               - 'max': max pooling, returns [B, dim]
156 |               - 'concat': concatenate the pooled features, returns [B, num_queries*dim]
157 |               - None: don't pool and returns [B, num_queries, dim]
158 |         """
159 |         super().__init__()
160 |         assert pool_type in ["avg", "concat", "none", "max", None]
161 |         self._pool_type = pool_type
162 |         self.S = nn.Parameter(torch.Tensor(1, num_queries, dim))
163 |         nn.init.xavier_uniform_(self.S)
164 |         self.mab = SetAttention(
165 |             dim,
166 |             dim,
167 |             dim,
168 |             num_heads=num_heads,
169 |             layer_norm=layer_norm,
170 |         )
171 | 
172 |     def forward(self, X, mask=None):
173 |         O = self.mab(self.S.repeat(X.size(0), 1, 1), X, mask)
174 |         if self._pool_type == "avg":
175 |             return O.mean(dim=1)
176 |         elif self._pool_type == "max":
177 |             return O.max(dim=1)[0]
178 |         elif self._pool_type == "concat":
179 |             return rearrange(O, "b q d -> b (q d)")
180 |         elif self._pool_type in ["none", None]:
181 |             return O
182 |         else:
183 |             raise ValueError(f"Unknown pool_type: {self._pool_type}")
184 | 
185 | 
186 | class IdentityKeyValuePoolingAttention(nn.Module):
187 |     """
188 |     The key/value are identity functions as the original features, and only
189 |     the query (external inducing point) is learned.
190 |     See CoCa paper: https://arxiv.org/abs/2205.01917
191 |     """
192 | 
193 |     def __init__(self, dim, num_heads, num_queries=1):
194 |         """
195 |         Args:
196 |         """
197 |         super().__init__()
198 |         self.Q = nn.Parameter(torch.Tensor(1, num_queries, dim))
199 |         nn.init.xavier_uniform_(self.Q)
200 |         self.dim = dim
201 |         self.num_heads = num_heads
202 |         assert self.dim % self.num_heads == 0
203 |         self._extra_repr = dict(dim=dim, num_heads=num_heads, num_queries=num_queries)
204 | 
205 |     def forward(self, V):
206 |         # V: [B, L, D], L is sequence length
207 |         B, L, D = V.size()
208 |         assert D == self.dim
209 |         batch_size = V.size(0)
210 |         Q = self.Q.repeat(batch_size, 1, 1)
211 |         K = V  # K and V are both identity functions from the original features
212 | 
213 |         dim_split = self.dim // self.num_heads
214 |         Q_ = torch.cat(Q.split(dim_split, 2), 0)
215 |         K_ = torch.cat(K.split(dim_split, 2), 0)
216 |         V_ = torch.cat(V.split(dim_split, 2), 0)
217 | 
218 |         A = torch.softmax(Q_.bmm(K_.transpose(1, 2)) / math.sqrt(self.dim), 2)
219 |         O = A.bmm(V_)
220 |         O = rearrange(O, "(nh b) q d -> b q (nh d)", b=batch_size)
221 |         return O.mean(1)  # average over number of query vector features
222 | 
223 |     def extra_repr(self) -> str:
224 |         return ", ".join(f"{k}={v}" for k, v in self._extra_repr.items())
225 | 


--------------------------------------------------------------------------------
/transic/nn/features/pointcloud/set_transformer/set_xf_pcd_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from transic.nn.features.pointcloud.set_transformer.set_transformer import (
 5 |     PoolingSetAttention,
 6 | )
 7 | 
 8 | 
 9 | class SetXFPCDEncoder(nn.Module):
10 |     def __init__(
11 |         self,
12 |         *,
13 |         n_coordinates: int = 3,
14 |         add_ee_embd: bool = False,
15 |         ee_embd_dim: int = 128,
16 |         hidden_dim: int = 512,
17 |         subtract_mean: bool = False,
18 |         set_xf_num_heads: int = 8,
19 |         set_xf_num_queries: int = 8,
20 |         set_xf_pool_type,
21 |         set_xf_layer_norm: bool = False,
22 |     ):
23 |         super().__init__()
24 |         pn_in_channels = n_coordinates
25 |         if add_ee_embd:
26 |             pn_in_channels += ee_embd_dim
27 |         if subtract_mean:
28 |             pn_in_channels += n_coordinates
29 | 
30 |         self.linear = nn.Linear(pn_in_channels, hidden_dim)
31 |         self.num_queries = set_xf_num_queries
32 |         self.set_xf = PoolingSetAttention(
33 |             dim=hidden_dim,
34 |             num_heads=set_xf_num_heads,
35 |             num_queries=set_xf_num_queries,
36 |             pool_type=set_xf_pool_type,
37 |             layer_norm=set_xf_layer_norm,
38 |         )
39 |         self.ee_embd_layer = None
40 |         if add_ee_embd:
41 |             self.ee_embd_layer = nn.Embedding(2, embedding_dim=ee_embd_dim)
42 |         self.add_ee_embd = add_ee_embd
43 |         self.subtract_mean = subtract_mean
44 |         if set_xf_pool_type == "concat":
45 |             self.output_dim = hidden_dim * set_xf_num_queries
46 |         else:
47 |             self.output_dim = hidden_dim
48 | 
49 |     def forward(self, x):
50 |         """
51 |         x["coordinate"]: (..., points, coordinates)
52 |         x["ee_mask"]: (..., points) if present
53 |         x["pad_mask"]: (..., points) if present (for variable length point clouds)
54 |         """
55 |         point = x["coordinate"]
56 |         leading_dims = point.shape[:-2]
57 |         point = point.reshape(-1, *point.shape[-2:])
58 |         ee_mask = x.get("ee_mask", None)
59 |         if ee_mask is not None:
60 |             ee_mask = ee_mask.reshape(-1, *ee_mask.shape[-1:])
61 |         pad_mask = x.get("pad_mask", None)
62 |         if pad_mask is not None:
63 |             pad_mask = pad_mask.reshape(-1, *pad_mask.shape[-1:])
64 |             pad_mask = pad_mask.to(dtype=torch.bool)
65 |             pad_mask = pad_mask.unsqueeze(1)  # (..., 1, points)
66 |             pad_mask = pad_mask.repeat(
67 |                 1, self.num_queries, 1
68 |             )  # (..., num_queries, points)
69 |         if self.subtract_mean:
70 |             mean = torch.mean(point, dim=-2, keepdim=True)  # (..., 1, coordinates)
71 |             mean = torch.broadcast_to(mean, point.shape)  # (..., points, coordinates)
72 |             point = point - mean
73 |             point = torch.cat([point, mean], dim=-1)  # (..., points, 2 * coordinates)
74 |         if self.add_ee_embd:
75 |             ee_mask = ee_mask.to(dtype=torch.long)  # (..., points)
76 |             ee_embd = self.ee_embd_layer(ee_mask)  # (..., points, ee_embd_dim)
77 |             point = torch.concat(
78 |                 [point, ee_embd], dim=-1
79 |             )  # (..., points, coordinates + ee_embd_dim)
80 |         point = self.linear(point)  # (..., points, hidden_dim)
81 |         output = self.set_xf(point, mask=pad_mask)  # (..., self.output_dim)
82 |         # recover leading dimensions
83 |         output = output.reshape(*leading_dims, *output.shape[-1:])
84 |         return output
85 | 


--------------------------------------------------------------------------------
/transic/nn/mlp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import torch.nn as nn
  4 | from typing import Callable, Literal
  5 | 
  6 | 
  7 | def get_activation(activation: str | Callable | None) -> Callable:
  8 |     if not activation:
  9 |         return nn.Identity
 10 |     elif callable(activation):
 11 |         return activation
 12 |     ACT_LAYER = {
 13 |         "tanh": nn.Tanh,
 14 |         "relu": lambda: nn.ReLU(inplace=True),
 15 |         "leaky_relu": lambda: nn.LeakyReLU(inplace=True),
 16 |         "swish": lambda: nn.SiLU(inplace=True),  # SiLU is alias for Swish
 17 |         "sigmoid": nn.Sigmoid,
 18 |         "elu": lambda: nn.ELU(inplace=True),
 19 |         "gelu": nn.GELU,
 20 |     }
 21 |     activation = activation.lower()
 22 |     assert activation in ACT_LAYER, f"Supported activations: {ACT_LAYER.keys()}"
 23 |     return ACT_LAYER[activation]
 24 | 
 25 | 
 26 | def get_initializer(method: str | Callable, activation: str) -> Callable:
 27 |     if isinstance(method, str):
 28 |         assert hasattr(
 29 |             nn.init, f"{method}_"
 30 |         ), f"Initializer nn.init.{method}_ does not exist"
 31 |         if method == "orthogonal":
 32 |             try:
 33 |                 gain = nn.init.calculate_gain(activation)
 34 |             except ValueError:
 35 |                 gain = 1.0
 36 |             return lambda x: nn.init.orthogonal_(x, gain=gain)
 37 |         else:
 38 |             return getattr(nn.init, f"{method}_")
 39 |     else:
 40 |         assert callable(method)
 41 |         return method
 42 | 
 43 | 
 44 | def build_mlp(
 45 |     input_dim,
 46 |     *,
 47 |     hidden_dim: int,
 48 |     output_dim: int,
 49 |     hidden_depth: int = None,
 50 |     num_layers: int = None,
 51 |     activation: str | Callable = "relu",
 52 |     weight_init: str | Callable = "orthogonal",
 53 |     bias_init="zeros",
 54 |     norm_type: Literal["batchnorm", "layernorm"] | None = None,
 55 |     add_input_activation: bool | str | Callable = False,
 56 |     add_input_norm: bool = False,
 57 |     add_output_activation: bool | str | Callable = False,
 58 |     add_output_norm: bool = False,
 59 | ) -> nn.Sequential:
 60 |     """
 61 |     In other popular RL implementations, tanh is typically used with orthogonal
 62 |     initialization, which may perform better than ReLU.
 63 | 
 64 |     Args:
 65 |         norm_type: None, "batchnorm", "layernorm", applied to intermediate layers
 66 |         add_input_activation: whether to add a nonlinearity to the input _before_
 67 |             the MLP computation. This is useful for processing a feature from a preceding
 68 |             image encoder, for example. Image encoder typically has a linear layer
 69 |             at the end, and we don't want the MLP to immediately stack another linear
 70 |             layer on the input features.
 71 |             - True to add the same activation as the rest of the MLP
 72 |             - str to add an activation of a different type.
 73 |         add_input_norm: see `add_input_activation`, whether to add a normalization layer
 74 |             to the input _before_ the MLP computation.
 75 |             values: True to add the `norm_type` to the input
 76 |         add_output_activation: whether to add a nonlinearity to the output _after_ the
 77 |             MLP computation.
 78 |             - True to add the same activation as the rest of the MLP
 79 |             - str to add an activation of a different type.
 80 |         add_output_norm: see `add_output_activation`, whether to add a normalization layer
 81 |             _after_ the MLP computation.
 82 |             values: True to add the `norm_type` to the input
 83 |     """
 84 |     assert (hidden_depth is None) != (num_layers is None), (
 85 |         "Either hidden_depth or num_layers must be specified, but not both. "
 86 |         "num_layers is defined as hidden_depth+1"
 87 |     )
 88 |     if hidden_depth is not None:
 89 |         assert hidden_depth >= 0
 90 |     if num_layers is not None:
 91 |         assert num_layers >= 1
 92 |     act_layer = get_activation(activation)
 93 | 
 94 |     weight_init = get_initializer(weight_init, activation)
 95 |     bias_init = get_initializer(bias_init, activation)
 96 | 
 97 |     if norm_type is not None:
 98 |         norm_type = norm_type.lower()
 99 | 
100 |     if not norm_type:
101 |         norm_type = nn.Identity
102 |     elif norm_type == "batchnorm":
103 |         norm_type = nn.BatchNorm1d
104 |     elif norm_type == "layernorm":
105 |         norm_type = nn.LayerNorm
106 |     else:
107 |         raise ValueError(f"Unsupported norm layer: {norm_type}")
108 | 
109 |     hidden_depth = num_layers - 1 if hidden_depth is None else hidden_depth
110 |     if hidden_depth == 0:
111 |         mods = [nn.Linear(input_dim, output_dim)]
112 |     else:
113 |         mods = [nn.Linear(input_dim, hidden_dim), norm_type(hidden_dim), act_layer()]
114 |         for i in range(hidden_depth - 1):
115 |             mods += [
116 |                 nn.Linear(hidden_dim, hidden_dim),
117 |                 norm_type(hidden_dim),
118 |                 act_layer(),
119 |             ]
120 |         mods.append(nn.Linear(hidden_dim, output_dim))
121 | 
122 |     if add_input_norm:
123 |         mods = [norm_type(input_dim)] + mods
124 |     if add_input_activation:
125 |         if add_input_activation is not True:
126 |             act_layer = get_activation(add_input_activation)
127 |         mods = [act_layer()] + mods
128 |     if add_output_norm:
129 |         mods.append(norm_type(output_dim))
130 |     if add_output_activation:
131 |         if add_output_activation is not True:
132 |             act_layer = get_activation(add_output_activation)
133 |         mods.append(act_layer())
134 | 
135 |     for mod in mods:
136 |         if isinstance(mod, nn.Linear):
137 |             weight_init(mod.weight)
138 |             bias_init(mod.bias)
139 | 
140 |     return nn.Sequential(*mods)
141 | 
142 | 
143 | class MLP(nn.Module):
144 |     def __init__(
145 |         self,
146 |         input_dim,
147 |         *,
148 |         hidden_dim: int,
149 |         output_dim: int,
150 |         hidden_depth: int = None,
151 |         num_layers: int = None,
152 |         activation: str | Callable = "relu",
153 |         weight_init: str | Callable = "orthogonal",
154 |         bias_init="zeros",
155 |         norm_type: Literal["batchnorm", "layernorm"] | None = None,
156 |         add_input_activation: bool | str | Callable = False,
157 |         add_input_norm: bool = False,
158 |         add_output_activation: bool | str | Callable = False,
159 |         add_output_norm: bool = False,
160 |     ):
161 |         super().__init__()
162 |         # delegate to build_mlp by keywords
163 |         self.layers = build_mlp(
164 |             input_dim,
165 |             hidden_dim=hidden_dim,
166 |             output_dim=output_dim,
167 |             hidden_depth=hidden_depth,
168 |             num_layers=num_layers,
169 |             activation=activation,
170 |             weight_init=weight_init,
171 |             bias_init=bias_init,
172 |             norm_type=norm_type,
173 |             add_input_activation=add_input_activation,
174 |             add_input_norm=add_input_norm,
175 |             add_output_activation=add_output_activation,
176 |             add_output_norm=add_output_norm,
177 |         )
178 |         # add attributes to the class
179 |         self.input_dim = input_dim
180 |         self.output_dim = output_dim
181 |         self.hidden_depth = hidden_depth
182 |         self.activation = activation
183 |         self.weight_init = weight_init
184 |         self.bias_init = bias_init
185 |         self.norm_type = norm_type
186 |         if add_input_activation is True:
187 |             self.input_activation = activation
188 |         else:
189 |             self.input_activation = add_input_activation
190 |         if add_input_norm is True:
191 |             self.input_norm_type = norm_type
192 |         else:
193 |             self.input_norm_type = None
194 |         # do the same for output activation and norm
195 |         if add_output_activation is True:
196 |             self.output_activation = activation
197 |         else:
198 |             self.output_activation = add_output_activation
199 |         if add_output_norm is True:
200 |             self.output_norm_type = norm_type
201 |         else:
202 |             self.output_norm_type = None
203 | 
204 |     def forward(self, x):
205 |         return self.layers(x)
206 | 


--------------------------------------------------------------------------------
/transic/real_world/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/real_world/__init__.py


--------------------------------------------------------------------------------
/transic/residual/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/residual/__init__.py


--------------------------------------------------------------------------------
/transic/residual/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_module import ResidualDataModule
2 | 


--------------------------------------------------------------------------------
/transic/residual/data/collate.py:
--------------------------------------------------------------------------------
 1 | from math import ceil
 2 | from copy import deepcopy
 3 | 
 4 | import numpy as np
 5 | 
 6 | from transic.utils.array import (
 7 |     get_batch_size,
 8 |     any_slice,
 9 |     any_stack,
10 |     any_concat,
11 |     any_ones_like,
12 |     any_to_torch_tensor,
13 |     nested_np_split,
14 | )
15 | 
16 | 
17 | def collate_fn(
18 |     sample_list,
19 |     ctx_len: int = 5,
20 | ):
21 |     """
22 |     sample_list: List of Dict[str, np.ndarray]
23 |     """
24 |     L_max = max(get_batch_size(sample) for sample in sample_list)
25 |     N_chunks = ceil(L_max / ctx_len)
26 |     L_pad_max = N_chunks * ctx_len
27 | 
28 |     sample_structure = deepcopy(any_slice(sample_list[0], np.s_[0:1]))
29 |     # pad to max length in this batch
30 |     processed_main_data = any_stack(
31 |         [
32 |             any_concat(
33 |                 [
34 |                     sample,
35 |                 ]
36 |                 + [any_ones_like(sample_structure)]
37 |                 * (L_pad_max - get_batch_size(sample)),
38 |                 dim=0,
39 |             )
40 |             for sample in sample_list
41 |         ],
42 |         dim=0,
43 |     )  # dict of (B, L_pad_max, ...)
44 |     # construct mask
45 |     mask = any_stack(
46 |         [
47 |             any_concat(
48 |                 [
49 |                     np.ones((get_batch_size(sample),), dtype=bool),
50 |                     np.zeros((L_pad_max - get_batch_size(sample),), dtype=bool),
51 |                 ]
52 |             )
53 |             for sample in sample_list
54 |         ],
55 |         dim=0,
56 |     )  # (B, L_pad_max)
57 | 
58 |     # split into chunks
59 |     processed_main_data = {
60 |         k: any_stack(v, dim=0)
61 |         for k, v in nested_np_split(processed_main_data, N_chunks, axis=1).items()
62 |     }  # dict of (N_chunks, B, ctx_len, ...)
63 |     mask = any_stack(np.split(mask, N_chunks, axis=1), dim=0)  # (N_chunks, B, ctx_len)
64 |     processed_main_data["pad_mask"] = mask
65 | 
66 |     # convert to tensor
67 |     processed_main_data = {
68 |         k: any_to_torch_tensor(v) for k, v in processed_main_data.items()
69 |     }
70 |     return processed_main_data
71 | 


--------------------------------------------------------------------------------
/transic/residual/data/data_module.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Literal, List
  2 | from functools import partial
  3 | 
  4 | import torch
  5 | from torch.utils.data import DataLoader
  6 | from pytorch_lightning import LightningDataModule
  7 | 
  8 | from transic.residual.data.dataset import ResidualDataset, ResidualSeqDataset
  9 | from transic.residual.data.collate import collate_fn as _collate_fn
 10 | 
 11 | 
 12 | class ResidualDataModule(LightningDataModule):
 13 |     def __init__(
 14 |         self,
 15 |         *,
 16 |         data_dir: str,
 17 |         variable_len_pcd_handle_strategy: Literal["pad", "truncate"],
 18 |         include_grasp_action: bool,
 19 |         gripper_close_width: float,
 20 |         gripper_open_width: float = 0.08,
 21 |         ctx_len: int = -1,  # -1 means not using the SeqDataset at all
 22 |         seed: Optional[int] = None,
 23 |         batch_size: int,
 24 |         val_batch_size: Optional[int],
 25 |         train_portion: float = 0.9,
 26 |         dataloader_num_workers: int,
 27 |     ):
 28 |         super().__init__()
 29 | 
 30 |         self._data_dir = data_dir
 31 |         self._variable_len_pcd_handle_strategy = variable_len_pcd_handle_strategy
 32 |         self._include_grasp_action = include_grasp_action
 33 |         self._gripper_close_width = gripper_close_width
 34 |         self._gripper_open_width = gripper_open_width
 35 |         self._seed = seed
 36 |         self._bs = batch_size
 37 |         self._vbs = val_batch_size or batch_size
 38 |         self._train_portion = train_portion
 39 |         self._dataloader_num_workers = dataloader_num_workers
 40 | 
 41 |         self._ds_cls = ResidualSeqDataset if ctx_len != -1 else ResidualDataset
 42 |         self._collate_fn = (
 43 |             partial(_collate_fn, ctx_len=ctx_len) if ctx_len != -1 else None
 44 |         )
 45 | 
 46 |         self._train_dataset, self._val_dataset = None, None
 47 |         self._P_intervention = None
 48 | 
 49 |     @property
 50 |     def P_intervention(self):
 51 |         assert self._P_intervention is not None, "Call setup() first"
 52 |         return self._P_intervention
 53 | 
 54 |     def setup(self, stage: str) -> None:
 55 |         if stage == "fit" or stage is None:
 56 |             ds = self._ds_cls(
 57 |                 data_dir=self._data_dir,
 58 |                 variable_len_pcd_handle_strategy=self._variable_len_pcd_handle_strategy,
 59 |                 include_grasp_action=self._include_grasp_action,
 60 |                 gripper_close_width=self._gripper_close_width,
 61 |                 gripper_open_width=self._gripper_open_width,
 62 |                 seed=self._seed,
 63 |             )
 64 |             self._P_intervention = ds.P_intervention
 65 |             self._train_dataset, self._val_dataset = _sequential_split_dataset(
 66 |                 ds, split_portions=[self._train_portion, 1 - self._train_portion]
 67 |             )
 68 | 
 69 |     def train_dataloader(self):
 70 |         return DataLoader(
 71 |             self._train_dataset,
 72 |             batch_size=self._bs,
 73 |             num_workers=min(self._bs, self._dataloader_num_workers),
 74 |             pin_memory=True,
 75 |             persistent_workers=True,
 76 |             collate_fn=self._collate_fn,
 77 |         )
 78 | 
 79 |     def val_dataloader(self):
 80 |         return DataLoader(
 81 |             self._val_dataset,
 82 |             batch_size=self._vbs,
 83 |             num_workers=min(self._vbs, self._dataloader_num_workers),
 84 |             pin_memory=True,
 85 |             persistent_workers=True,
 86 |             collate_fn=self._collate_fn,
 87 |         )
 88 | 
 89 | 
 90 | def _accumulate(iterable, fn=lambda x, y: x + y):
 91 |     """
 92 |     Return running totals
 93 |     # _accumulate([1,2,3,4,5]) --> 1 3 6 10 15
 94 |     # _accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120
 95 |     """
 96 |     it = iter(iterable)
 97 |     try:
 98 |         total = next(it)
 99 |     except StopIteration:
100 |         return
101 |     yield total
102 |     for element in it:
103 |         total = fn(total, element)
104 |         yield total
105 | 
106 | 
107 | def _sequential_split_dataset(
108 |     dataset: torch.utils.data.Dataset, split_portions: List[float]
109 | ):
110 |     """
111 |     Split a dataset into multiple datasets, each with a different portion of the
112 |     original dataset. Uses torch.utils.data.Subset.
113 |     """
114 |     assert len(split_portions) > 0, "split_portions must be a non-empty list"
115 |     assert all(0.0 <= p <= 1.0 for p in split_portions), f"{split_portions=}"
116 |     assert abs(sum(split_portions) - 1.0) < 1e-6, f"{sum(split_portions)=} != 1.0"
117 |     L = len(dataset)
118 |     assert L > 0, "dataset must be non-empty"
119 |     # split the list with proportions
120 |     lengths = [int(p * L) for p in split_portions]
121 |     # make sure the last split fills the full dataset
122 |     lengths[-1] += L - sum(lengths)
123 |     indices = list(range(L))
124 | 
125 |     return [
126 |         torch.utils.data.Subset(dataset, indices[offset - length : offset])
127 |         for offset, length in zip(_accumulate(lengths), lengths)
128 |     ]
129 | 


--------------------------------------------------------------------------------
/transic/residual/policy/__init__.py:
--------------------------------------------------------------------------------
1 | from .perceiver_residual_policy import PerceiverResidualPolicy
2 | from .pointnet_residual_policy import PointNetResidualPolicy
3 | 


--------------------------------------------------------------------------------
/transic/residual/policy/perceiver_residual_policy.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | 
  5 | from transic.learn.policy import GMMHead, CategoricalNet
  6 | from transic.learn.policy import BasePolicy
  7 | from transic.nn.features import (
  8 |     SimpleFeatureFusion,
  9 |     SetXFPCDEncoder,
 10 |     Identity,
 11 |     Embedding,
 12 | )
 13 | from transic.utils.torch_utils import load_state_dict, freeze_params
 14 | 
 15 | 
 16 | class PerceiverResidualPolicy(BasePolicy):
 17 |     def __init__(
 18 |         self,
 19 |         *,
 20 |         point_channels: int = 3,
 21 |         subtract_point_mean: bool = False,
 22 |         add_ee_embd: bool = False,
 23 |         ee_embd_dim: int,
 24 |         set_xf_hidden_dim: int,
 25 |         set_xf_num_heads: int,
 26 |         set_xf_num_queries: int,
 27 |         set_xf_pool_type: str,
 28 |         set_xf_layer_norm: bool,
 29 |         prop_input_dim: int,
 30 |         robot_policy_output_dim: int,
 31 |         include_robot_policy_gripper_action_input: bool,
 32 |         robot_policy_gripper_action_embd_dim: int,
 33 |         feature_fusion_hidden_depth: int = 1,
 34 |         feature_fusion_hidden_dim: int = 256,
 35 |         feature_fusion_output_dim: int = 256,
 36 |         feature_fusion_activation: str = "relu",
 37 |         feature_fusion_add_input_activation: bool = False,
 38 |         feature_fusion_add_output_activation: bool = False,
 39 |         action_dim: int,
 40 |         action_net_gmm_n_modes: int = 5,
 41 |         action_net_hidden_dim: int,
 42 |         action_net_hidden_depth: int,
 43 |         action_net_activation: str = "relu",
 44 |         intervention_head_hidden_dim: int,
 45 |         intervention_head_hidden_depth: int,
 46 |         intervention_head_activation: str = "relu",
 47 |         deterministic_inference: bool = True,
 48 |         gmm_low_noise_eval: bool = True,
 49 |         update_intervention_head_only: bool = False,
 50 |         ckpt_path_if_update_intervention_head_only: str = None,
 51 |     ):
 52 |         super().__init__()
 53 | 
 54 |         extractors = {
 55 |             "pcd": SetXFPCDEncoder(
 56 |                 n_coordinates=point_channels,
 57 |                 add_ee_embd=add_ee_embd,
 58 |                 ee_embd_dim=ee_embd_dim,
 59 |                 hidden_dim=set_xf_hidden_dim,
 60 |                 subtract_mean=subtract_point_mean,
 61 |                 set_xf_num_heads=set_xf_num_heads,
 62 |                 set_xf_num_queries=set_xf_num_queries,
 63 |                 set_xf_pool_type=set_xf_pool_type,
 64 |                 set_xf_layer_norm=set_xf_layer_norm,
 65 |             ),
 66 |             "proprioception": Identity(prop_input_dim),
 67 |             "robot_policy_action": Identity(robot_policy_output_dim),
 68 |         }
 69 |         if include_robot_policy_gripper_action_input:
 70 |             extractors["robot_policy_gripper_action"] = Embedding(
 71 |                 num_embeddings=2,  # open/close
 72 |                 embedding_dim=robot_policy_gripper_action_embd_dim,
 73 |             )
 74 | 
 75 |         self.feature_extractor = SimpleFeatureFusion(
 76 |             extractors=extractors,
 77 |             hidden_depth=feature_fusion_hidden_depth,
 78 |             hidden_dim=feature_fusion_hidden_dim,
 79 |             output_dim=feature_fusion_output_dim,
 80 |             activation=feature_fusion_activation,
 81 |             add_input_activation=feature_fusion_add_input_activation,
 82 |             add_output_activation=feature_fusion_add_output_activation,
 83 |         )
 84 | 
 85 |         self.action_net = GMMHead(
 86 |             feature_fusion_output_dim,
 87 |             n_modes=action_net_gmm_n_modes,
 88 |             action_dim=action_dim,
 89 |             hidden_dim=action_net_hidden_dim,
 90 |             hidden_depth=action_net_hidden_depth,
 91 |             activation=action_net_activation,
 92 |             low_noise_eval=gmm_low_noise_eval,
 93 |         )
 94 |         self.intervention_head = CategoricalNet(
 95 |             feature_fusion_output_dim,
 96 |             action_dim=2,  # intervention or not
 97 |             hidden_dim=intervention_head_hidden_dim,
 98 |             hidden_depth=intervention_head_hidden_depth,
 99 |             activation=intervention_head_activation,
100 |         )
101 |         if update_intervention_head_only:
102 |             assert os.path.exists(ckpt_path_if_update_intervention_head_only)
103 |             ckpt = torch.load(
104 |                 ckpt_path_if_update_intervention_head_only, map_location="cpu"
105 |             )
106 | 
107 |             feature_extractor_weighs = {
108 |                 k: v
109 |                 for k, v in ckpt["state_dict"].items()
110 |                 if k.startswith("residual_policy.feature_extractor")
111 |             }
112 |             load_state_dict(
113 |                 self.feature_extractor,
114 |                 feature_extractor_weighs,
115 |                 strip_prefix="residual_policy.feature_extractor.",
116 |                 strict=True,
117 |             )
118 |             freeze_params(self.feature_extractor)
119 | 
120 |             action_net_weights = {
121 |                 k: v
122 |                 for k, v in ckpt["state_dict"].items()
123 |                 if k.startswith("residual_policy.action_net")
124 |             }
125 |             load_state_dict(
126 |                 self.action_net,
127 |                 action_net_weights,
128 |                 strip_prefix="residual_policy.action_net.",
129 |                 strict=True,
130 |             )
131 |             freeze_params(self.action_net)
132 | 
133 |         self._deterministic_inference = deterministic_inference
134 | 
135 |     def forward(self, obs):
136 |         feature = self.feature_extractor(obs)
137 |         action_dist = self.action_net(feature)
138 |         intervention_dist = self.intervention_head(feature)
139 |         return action_dist, intervention_dist
140 | 
141 |     @torch.no_grad()
142 |     def act(self, obs, deterministic=None):
143 |         action_dist, intervention_dist = self.forward(obs)
144 |         if deterministic is None:
145 |             deterministic = self._deterministic_inference
146 |         if deterministic:
147 |             return action_dist.mode(), intervention_dist.mode()
148 |         else:
149 |             return action_dist.sample(), intervention_dist.sample()
150 | 


--------------------------------------------------------------------------------
/transic/residual/policy/pointnet_residual_policy.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | 
  5 | from transic.learn.policy import GMMHead, CategoricalNet
  6 | from transic.learn.policy import BasePolicy
  7 | from transic.nn.features import SimpleFeatureFusion, PointNet, Identity, Embedding
  8 | from transic.utils.torch_utils import load_state_dict, freeze_params
  9 | 
 10 | 
 11 | class PointNetResidualPolicy(BasePolicy):
 12 |     def __init__(
 13 |         self,
 14 |         *,
 15 |         point_channels: int = 3,
 16 |         subtract_point_mean: bool = False,
 17 |         add_ee_embd: bool = False,
 18 |         ee_embd_dim: int,
 19 |         pointnet_output_dim: int,
 20 |         pointnet_hidden_dim: int,
 21 |         pointnet_hidden_depth: int,
 22 |         pointnet_activation: str = "gelu",
 23 |         prop_input_dim: int,
 24 |         robot_policy_output_dim: int,
 25 |         include_robot_policy_gripper_action_input: bool,
 26 |         robot_policy_gripper_action_embd_dim: int,
 27 |         feature_fusion_hidden_depth: int = 1,
 28 |         feature_fusion_hidden_dim: int = 256,
 29 |         feature_fusion_output_dim: int = 256,
 30 |         feature_fusion_activation: str = "relu",
 31 |         feature_fusion_add_input_activation: bool = False,
 32 |         feature_fusion_add_output_activation: bool = False,
 33 |         action_dim: int,
 34 |         action_net_gmm_n_modes: int = 5,
 35 |         action_net_hidden_dim: int,
 36 |         action_net_hidden_depth: int,
 37 |         action_net_activation: str = "relu",
 38 |         intervention_head_hidden_dim: int,
 39 |         intervention_head_hidden_depth: int,
 40 |         intervention_head_activation: str = "relu",
 41 |         deterministic_inference: bool = True,
 42 |         gmm_low_noise_eval: bool = True,
 43 |         update_intervention_head_only: bool = False,
 44 |         ckpt_path_if_update_intervention_head_only: str = None,
 45 |     ):
 46 |         super().__init__()
 47 | 
 48 |         extractors = {
 49 |             "pcd": PointNet(
 50 |                 n_coordinates=point_channels,
 51 |                 add_ee_embd=add_ee_embd,
 52 |                 ee_embd_dim=ee_embd_dim,
 53 |                 output_dim=pointnet_output_dim,
 54 |                 hidden_dim=pointnet_hidden_dim,
 55 |                 hidden_depth=pointnet_hidden_depth,
 56 |                 activation=pointnet_activation,
 57 |                 subtract_mean=subtract_point_mean,
 58 |             ),
 59 |             "proprioception": Identity(prop_input_dim),
 60 |             "robot_policy_action": Identity(robot_policy_output_dim),
 61 |         }
 62 |         if include_robot_policy_gripper_action_input:
 63 |             extractors["robot_policy_gripper_action"] = Embedding(
 64 |                 num_embeddings=2,  # open/close
 65 |                 embedding_dim=robot_policy_gripper_action_embd_dim,
 66 |             )
 67 |         self.feature_extractor = SimpleFeatureFusion(
 68 |             extractors=extractors,
 69 |             hidden_depth=feature_fusion_hidden_depth,
 70 |             hidden_dim=feature_fusion_hidden_dim,
 71 |             output_dim=feature_fusion_output_dim,
 72 |             activation=feature_fusion_activation,
 73 |             add_input_activation=feature_fusion_add_input_activation,
 74 |             add_output_activation=feature_fusion_add_output_activation,
 75 |         )
 76 | 
 77 |         self.action_net = GMMHead(
 78 |             feature_fusion_output_dim,
 79 |             n_modes=action_net_gmm_n_modes,
 80 |             action_dim=action_dim,
 81 |             hidden_dim=action_net_hidden_dim,
 82 |             hidden_depth=action_net_hidden_depth,
 83 |             activation=action_net_activation,
 84 |             low_noise_eval=gmm_low_noise_eval,
 85 |         )
 86 |         self.intervention_head = CategoricalNet(
 87 |             feature_fusion_output_dim,
 88 |             action_dim=2,  # intervention or not
 89 |             hidden_dim=intervention_head_hidden_dim,
 90 |             hidden_depth=intervention_head_hidden_depth,
 91 |             activation=intervention_head_activation,
 92 |         )
 93 |         if update_intervention_head_only:
 94 |             assert os.path.exists(ckpt_path_if_update_intervention_head_only)
 95 |             ckpt = torch.load(
 96 |                 ckpt_path_if_update_intervention_head_only, map_location="cpu"
 97 |             )
 98 | 
 99 |             feature_extractor_weighs = {
100 |                 k: v
101 |                 for k, v in ckpt["state_dict"].items()
102 |                 if k.startswith("residual_policy.feature_extractor")
103 |             }
104 |             load_state_dict(
105 |                 self.feature_extractor,
106 |                 feature_extractor_weighs,
107 |                 strip_prefix="residual_policy.feature_extractor.",
108 |                 strict=True,
109 |             )
110 |             freeze_params(self.feature_extractor)
111 | 
112 |             action_net_weights = {
113 |                 k: v
114 |                 for k, v in ckpt["state_dict"].items()
115 |                 if k.startswith("residual_policy.action_net")
116 |             }
117 |             load_state_dict(
118 |                 self.action_net,
119 |                 action_net_weights,
120 |                 strip_prefix="residual_policy.action_net.",
121 |                 strict=True,
122 |             )
123 |             freeze_params(self.action_net)
124 | 
125 |         self._deterministic_inference = deterministic_inference
126 | 
127 |     def forward(self, obs):
128 |         feature = self.feature_extractor(obs)
129 |         action_dist = self.action_net(feature)
130 |         intervention_dist = self.intervention_head(feature)
131 |         return action_dist, intervention_dist
132 | 
133 |     @torch.no_grad()
134 |     def act(self, obs, deterministic=None):
135 |         action_dist, intervention_dist = self.forward(obs)
136 |         if deterministic is None:
137 |             deterministic = self._deterministic_inference
138 |         if deterministic:
139 |             return action_dist.mode(), intervention_dist.mode()
140 |         else:
141 |             return action_dist.sample(), intervention_dist.sample()
142 | 


--------------------------------------------------------------------------------
/transic/rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/rl/__init__.py


--------------------------------------------------------------------------------
/transic/rl/models.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch.nn as nn
  3 | import torch
  4 | from gym import spaces
  5 | import rl_games.common.divergence as divergence
  6 | from rl_games.common.extensions.distributions import CategoricalMasked
  7 | from rl_games.algos_torch.running_mean_std import RunningMeanStd
  8 | 
  9 | from transic.rl.moving_avg import RunningMeanStdObs
 10 | 
 11 | 
 12 | class BaseModel:
 13 |     def __init__(self, model_class):
 14 |         self.model_class = model_class
 15 | 
 16 |     def is_rnn(self):
 17 |         return False
 18 | 
 19 |     def is_separate_critic(self):
 20 |         return False
 21 | 
 22 |     def get_value_layer(self):
 23 |         return None
 24 | 
 25 |     def build(self, config):
 26 |         obs_shape = config["input_shape"]
 27 |         normalize_value = config.get("normalize_value", False)
 28 |         normalize_input = config.get("normalize_input", False)
 29 |         normalize_input_excluded_keys = config.get(
 30 |             "normalize_input_excluded_keys", None
 31 |         )
 32 |         value_size = config.get("value_size", 1)
 33 |         return self.Network(
 34 |             self.network_builder.build(self.model_class, **config),
 35 |             obs_shape=obs_shape,
 36 |             normalize_value=normalize_value,
 37 |             normalize_input=normalize_input,
 38 |             value_size=value_size,
 39 |             normalize_input_excluded_keys=normalize_input_excluded_keys,
 40 |         )
 41 | 
 42 | 
 43 | class BaseModelNetwork(nn.Module):
 44 |     def __init__(
 45 |         self,
 46 |         obs_shape,
 47 |         normalize_value,
 48 |         normalize_input,
 49 |         value_size,
 50 |         normalize_input_excluded_keys=None,
 51 |     ):
 52 |         nn.Module.__init__(self)
 53 |         self.obs_shape = obs_shape
 54 |         self.normalize_value = normalize_value
 55 |         self.normalize_input = normalize_input
 56 |         self.value_size = value_size
 57 | 
 58 |         if normalize_value:
 59 |             self.value_mean_std = RunningMeanStd(
 60 |                 (self.value_size,)
 61 |             )  # GeneralizedMovingStats((self.value_size,)) #
 62 |         if normalize_input:
 63 |             if isinstance(obs_shape, spaces.Dict):
 64 |                 self.running_mean_std = RunningMeanStdObs(
 65 |                     obs_shape, exclude_keys=normalize_input_excluded_keys
 66 |                 )
 67 |             else:
 68 |                 self.running_mean_std = RunningMeanStd(obs_shape)
 69 | 
 70 |     def norm_obs(self, observation):
 71 |         with torch.no_grad():
 72 |             return (
 73 |                 self.running_mean_std(observation)
 74 |                 if self.normalize_input
 75 |                 else observation
 76 |             )
 77 | 
 78 |     def denorm_value(self, value):
 79 |         with torch.no_grad():
 80 |             return (
 81 |                 self.value_mean_std(value, denorm=True)
 82 |                 if self.normalize_value
 83 |                 else value
 84 |             )
 85 | 
 86 | 
 87 | class ModelA2C(BaseModel):
 88 |     def __init__(self, network):
 89 |         BaseModel.__init__(self, "a2c")
 90 |         self.network_builder = network
 91 | 
 92 |     class Network(BaseModelNetwork):
 93 |         def __init__(self, a2c_network, **kwargs):
 94 |             BaseModelNetwork.__init__(self, **kwargs)
 95 |             self.a2c_network = a2c_network
 96 | 
 97 |         def is_rnn(self):
 98 |             return self.a2c_network.is_rnn()
 99 | 
100 |         def get_default_rnn_state(self):
101 |             return self.a2c_network.get_default_rnn_state()
102 | 
103 |         def get_value_layer(self):
104 |             return self.a2c_network.get_value_layer()
105 | 
106 |         def kl(self, p_dict, q_dict):
107 |             p = p_dict["logits"]
108 |             q = q_dict["logits"]
109 |             return divergence.d_kl_discrete(p, q)
110 | 
111 |         def forward(self, input_dict):
112 |             is_train = input_dict.get("is_train", True)
113 |             action_masks = input_dict.get("action_masks", None)
114 |             prev_actions = input_dict.get("prev_actions", None)
115 |             input_dict["obs"] = self.norm_obs(input_dict["obs"])
116 |             logits, value, states = self.a2c_network(input_dict)
117 | 
118 |             if is_train:
119 |                 categorical = CategoricalMasked(logits=logits, masks=action_masks)
120 |                 prev_neglogp = -categorical.log_prob(prev_actions)
121 |                 entropy = categorical.entropy()
122 |                 result = {
123 |                     "prev_neglogp": torch.squeeze(prev_neglogp),
124 |                     "logits": categorical.logits,
125 |                     "values": value,
126 |                     "entropy": entropy,
127 |                     "rnn_states": states,
128 |                 }
129 |                 return result
130 |             else:
131 |                 categorical = CategoricalMasked(logits=logits, masks=action_masks)
132 |                 selected_action = categorical.sample().long()
133 |                 neglogp = -categorical.log_prob(selected_action)
134 |                 result = {
135 |                     "neglogpacs": torch.squeeze(neglogp),
136 |                     "values": self.denorm_value(value),
137 |                     "actions": selected_action,
138 |                     "logits": categorical.logits,
139 |                     "rnn_states": states,
140 |                 }
141 |                 return result
142 | 
143 | 
144 | class ModelA2CContinuousLogStd(BaseModel):
145 |     def __init__(self, network):
146 |         BaseModel.__init__(self, "a2c")
147 |         self.network_builder = network
148 | 
149 |     class Network(BaseModelNetwork):
150 |         def __init__(self, a2c_network, **kwargs):
151 |             BaseModelNetwork.__init__(self, **kwargs)
152 |             self.a2c_network = a2c_network
153 | 
154 |         def is_rnn(self):
155 |             return self.a2c_network.is_rnn()
156 | 
157 |         def get_value_layer(self):
158 |             return self.a2c_network.get_value_layer()
159 | 
160 |         def get_default_rnn_state(self):
161 |             return self.a2c_network.get_default_rnn_state()
162 | 
163 |         def forward(self, input_dict):
164 |             is_train = input_dict.get("is_train", True)
165 |             prev_actions = input_dict.get("prev_actions", None)
166 |             input_dict["obs"] = self.norm_obs(input_dict["obs"])
167 |             mu, logstd, value, states = self.a2c_network(input_dict)
168 |             sigma = torch.exp(logstd)
169 |             distr = torch.distributions.Normal(mu, sigma, validate_args=False)
170 |             if is_train:
171 |                 entropy = distr.entropy().sum(dim=-1)
172 |                 prev_neglogp = self.neglogp(prev_actions, mu, sigma, logstd)
173 |                 result = {
174 |                     "prev_neglogp": torch.squeeze(prev_neglogp),
175 |                     "values": value,
176 |                     "entropy": entropy,
177 |                     "rnn_states": states,
178 |                     "mus": mu,
179 |                     "sigmas": sigma,
180 |                 }
181 |                 return result
182 |             else:
183 |                 selected_action = distr.sample()
184 |                 neglogp = self.neglogp(selected_action, mu, sigma, logstd)
185 |                 result = {
186 |                     "neglogpacs": torch.squeeze(neglogp),
187 |                     "values": self.denorm_value(value),
188 |                     "actions": selected_action,
189 |                     "rnn_states": states,
190 |                     "mus": mu,
191 |                     "sigmas": sigma,
192 |                 }
193 |                 return result
194 | 
195 |         def neglogp(self, x, mean, std, logstd):
196 |             return (
197 |                 0.5 * (((x - mean) / std) ** 2).sum(dim=-1)
198 |                 + 0.5 * np.log(2.0 * np.pi) * x.size()[-1]
199 |                 + logstd.sum(dim=-1)
200 |             )
201 | 


--------------------------------------------------------------------------------
/transic/rl/moving_avg.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import torch.nn as nn
 4 | from rl_games.algos_torch.running_mean_std import RunningMeanStd
 5 | from gym import spaces
 6 | 
 7 | 
 8 | class RunningMeanStdObs(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         insize,
12 |         epsilon=1e-05,
13 |         per_channel=False,
14 |         norm_only=False,
15 |         exclude_keys: list | None = None,
16 |     ):
17 |         assert isinstance(insize, spaces.Dict)
18 |         exclude_keys = exclude_keys or []
19 |         super(RunningMeanStdObs, self).__init__()
20 |         self.running_mean_std = nn.ModuleDict(
21 |             {
22 |                 k: RunningMeanStd(v.shape, epsilon, per_channel, norm_only)
23 |                 for k, v in insize.items()
24 |                 if k not in exclude_keys
25 |             }
26 |         )
27 |         self._exclude_keys = exclude_keys
28 | 
29 |     def forward(self, input, denorm=False):
30 |         res = {
31 |             k: self.running_mean_std[k](v, denorm) if k not in self._exclude_keys else v
32 |             for k, v in input.items()
33 |         }
34 |         return res
35 | 


--------------------------------------------------------------------------------
/transic/rl/runner.py:
--------------------------------------------------------------------------------
 1 | from rl_games.torch_runner import Runner as _Runner, _override_sigma
 2 | 
 3 | from transic.rl.agent import PPOAgent
 4 | from transic.rl.player import MyPPOPlayerContinuous as PPOPlayer
 5 | 
 6 | 
 7 | def _restore(agent, args, is_train_restore: bool):
 8 |     if (
 9 |         "checkpoint" in args
10 |         and args["checkpoint"] is not None
11 |         and args["checkpoint"] != ""
12 |     ):
13 |         set_epoch = args.get("from_ckpt_epoch", True)
14 |         if is_train_restore:
15 |             agent.restore(args["checkpoint"], set_epoch)
16 |         else:
17 |             agent.restore(args["checkpoint"])
18 | 
19 | 
20 | class Runner(_Runner):
21 |     def __init__(self, algo_observer=None):
22 |         super().__init__(algo_observer)
23 |         self.algo_factory.register_builder("ppo", lambda **kwargs: PPOAgent(**kwargs))
24 |         self.player_factory.register_builder(
25 |             "ppo", lambda **kwargs: PPOPlayer(**kwargs)
26 |         )
27 | 
28 |     def run_train(self, args):
29 |         print("Started to train")
30 |         agent = self.algo_factory.create(
31 |             self.algo_name, base_name="run", params=self.params
32 |         )
33 |         _restore(agent, args, is_train_restore=True)
34 |         _override_sigma(agent, args)
35 |         agent.train()
36 | 
37 |     def run_play(self, args):
38 |         print("Started to play")
39 |         save_rollouts_cfg = args.get("save_rollouts", {})
40 |         player = self.create_player(save_rollouts_cfg)
41 |         _restore(player, args, is_train_restore=False)
42 |         _override_sigma(player, args)
43 |         player.run()
44 | 
45 |     def create_player(self, save_rollouts_cfg):
46 |         return self.player_factory.create(
47 |             self.algo_name, params=self.params, **save_rollouts_cfg
48 |         )
49 | 


--------------------------------------------------------------------------------
/transic/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transic-robot/transic/9af4ff7a392d9b033bb3a05d16f1aaa1aa2472d6/transic/utils/__init__.py


--------------------------------------------------------------------------------
/transic/utils/config_utils.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | from omegaconf import OmegaConf
 3 | 
 4 | 
 5 | def is_sequence(obj):
 6 |     """
 7 |     Returns:
 8 |       True if the sequence is a collections.Sequence and not a string.
 9 |     """
10 |     return isinstance(obj, collections.abc.Sequence) and not isinstance(obj, str)
11 | 
12 | 
13 | def is_mapping(obj):
14 |     """
15 |     Returns:
16 |       True if the sequence is a collections.Mapping
17 |     """
18 |     return isinstance(obj, collections.abc.Mapping)
19 | 
20 | 
21 | def omegaconf_to_dict(cfg, resolve: bool = True, enum_to_str: bool = False):
22 |     """
23 |     Convert arbitrary nested omegaconf objects to primitive containers
24 | 
25 |     WARNING: cannot use tree lib because it gets confused on DictConfig and ListConfig
26 |     """
27 |     kw = dict(resolve=resolve, enum_to_str=enum_to_str)
28 |     if OmegaConf.is_config(cfg):
29 |         return OmegaConf.to_container(cfg, **kw)
30 |     elif is_sequence(cfg):
31 |         return type(cfg)(omegaconf_to_dict(c, **kw) for c in cfg)
32 |     elif is_mapping(cfg):
33 |         return {k: omegaconf_to_dict(c, **kw) for k, c in cfg.items()}
34 |     else:
35 |         return cfg
36 | 


--------------------------------------------------------------------------------
/transic/utils/misc_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union, Callable, Literal
 2 | 
 3 | import fnmatch
 4 | 
 5 | 
 6 | def _match_patterns_helper(element, patterns):
 7 |     for p in patterns:
 8 |         if callable(p) and p(element):
 9 |             return True
10 |         if fnmatch.fnmatch(element, p):
11 |             return True
12 |     return False
13 | 
14 | 
15 | def match_patterns(
16 |     item: str,
17 |     include: Union[str, List[str], Callable, List[Callable], None] = None,
18 |     exclude: Union[str, List[str], Callable, List[Callable], None] = None,
19 |     *,
20 |     precedence: Literal["include", "exclude"] = "exclude",
21 | ):
22 |     """
23 |     Args:
24 |         include: None to disable `include` filter and delegate to exclude
25 |         precedence: "include" or "exclude"
26 |     """
27 |     assert precedence in ["include", "exclude"]
28 |     if exclude is None:
29 |         exclude = []
30 |     if isinstance(exclude, (str, Callable)):
31 |         exclude = [exclude]
32 |     if isinstance(include, (str, Callable)):
33 |         include = [include]
34 |     if include is None:
35 |         # exclude is the sole veto vote
36 |         return not _match_patterns_helper(item, exclude)
37 | 
38 |     if precedence == "include":
39 |         return _match_patterns_helper(item, include)
40 |     else:
41 |         if _match_patterns_helper(item, exclude):
42 |             return False
43 |         else:
44 |             return _match_patterns_helper(item, include)
45 | 


--------------------------------------------------------------------------------
/transic/utils/reformat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-2023, NVIDIA Corporation
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | #    list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | #    this list of conditions and the following disclaimer in the documentation
12 | #    and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | #    contributors may be used to endorse or promote products derived from
16 | #    this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | from omegaconf import DictConfig, OmegaConf
30 | from typing import Dict
31 | 
32 | 
33 | def omegaconf_to_dict(d: DictConfig) -> Dict:
34 |     """Converts an omegaconf DictConfig to a python Dict, respecting variable interpolation."""
35 |     ret = {}
36 |     for k, v in d.items():
37 |         if isinstance(v, DictConfig):
38 |             ret[k] = omegaconf_to_dict(v)
39 |         else:
40 |             ret[k] = v
41 |     return ret
42 | 
43 | 
44 | def print_dict(val, nesting: int = -4, start: bool = True):
45 |     """Outputs a nested dictionory."""
46 |     if type(val) == dict:
47 |         if not start:
48 |             print("")
49 |         nesting += 4
50 |         for k in val:
51 |             print(nesting * " ", end="")
52 |             print(k, end=": ")
53 |             print_dict(val[k], nesting, start=False)
54 |     else:
55 |         print(val)
56 | 
57 | 
58 | # EOF
59 | 


--------------------------------------------------------------------------------
/transic/utils/torch_utils.py:
--------------------------------------------------------------------------------
 1 | import tree
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from transic.utils.tree_utils import tree_value_at_path
 6 | 
 7 | 
 8 | def load_state_dict(objects, states, strip_prefix=None, strict=False):
 9 |     """
10 |     Args:
11 |         strict: objects and states must match exactly
12 |         strip_prefix: only match the keys that have the prefix, and strip it
13 |     """
14 | 
15 |     def _load(paths, obj):
16 |         if not _implements_method(obj, "load_state_dict"):
17 |             raise ValueError(
18 |                 f"Object {type(obj)} does not support load_state_dict() method"
19 |             )
20 |         try:
21 |             state = tree_value_at_path(states, paths)
22 |         except ValueError:  # paths do not exist in `states` structure
23 |             if strict:
24 |                 raise
25 |             else:
26 |                 return
27 |         if strip_prefix:
28 |             assert isinstance(strip_prefix, str)
29 |             state = {
30 |                 k[len(strip_prefix) :]: v
31 |                 for k, v in state.items()
32 |                 if k.startswith(strip_prefix)
33 |             }
34 |         if isinstance(obj, nn.Module):
35 |             return obj.load_state_dict(state, strict=strict)
36 |         else:
37 |             return obj.load_state_dict(state)
38 | 
39 |     return tree.map_structure_with_path(_load, objects)
40 | 
41 | 
42 | def _implements_method(object, method: str):
43 |     """
44 |     Returns:
45 |         True if object implements a method
46 |     """
47 |     return hasattr(object, method) and callable(getattr(object, method))
48 | 
49 | 
50 | def set_requires_grad(model, requires_grad):
51 |     if torch.is_tensor(model):
52 |         model.requires_grad = requires_grad
53 |     else:
54 |         for param in model.parameters():
55 |             param.requires_grad = requires_grad
56 | 
57 | 
58 | def freeze_params(model):
59 |     set_requires_grad(model, False)
60 |     if not torch.is_tensor(model):
61 |         model.eval()
62 | 


--------------------------------------------------------------------------------
/transic/utils/tree_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import List, TypeVar, Iterable, Tuple
  2 | import numpy as np
  3 | 
  4 | try:
  5 |     import tree
  6 | 
  7 | except ImportError:
  8 |     raise ImportError("Please install dm_tree first: `pip install dm_tree`")
  9 | 
 10 | ElementType = TypeVar("ElementType")
 11 | 
 12 | 
 13 | def fast_map_structure(func, *structure):
 14 |     """Faster map_structure implementation which skips some error checking."""
 15 |     flat_structure = (tree.flatten(s) for s in structure)
 16 |     entries = zip(*flat_structure)
 17 |     # Arbitrarily choose one of the structures of the original sequence (the last)
 18 |     # to match the structure for the flattened sequence.
 19 |     return tree.unflatten_as(structure[-1], [func(*x) for x in entries])
 20 | 
 21 | 
 22 | def stack_sequence_fields(sequence: Iterable[ElementType]) -> ElementType:
 23 |     """Stacks a list of identically nested objects.
 24 | 
 25 |     This takes a sequence of identically nested objects and returns a single
 26 |     nested object whose ith leaf is a stacked numpy array of the corresponding
 27 |     ith leaf from each element of the sequence.
 28 | 
 29 |     For example, if `sequence` is:
 30 | 
 31 |     ```python
 32 |     [{
 33 |           'action': np.array([1.0]),
 34 |           'observation': (np.array([0.0, 1.0, 2.0]),),
 35 |           'reward': 1.0
 36 |      }, {
 37 |           'action': np.array([0.5]),
 38 |           'observation': (np.array([1.0, 2.0, 3.0]),),
 39 |           'reward': 0.0
 40 |      }, {
 41 |           'action': np.array([0.3]),1
 42 |           'observation': (np.array([2.0, 3.0, 4.0]),),
 43 |           'reward': 0.5
 44 |      }]
 45 |     ```
 46 | 
 47 |     Then this function will return:
 48 | 
 49 |     ```python
 50 |     {
 51 |         'action': np.array([....])         # array shape = [3 x 1]
 52 |         'observation': (np.array([...]),)  # array shape = [3 x 3]
 53 |         'reward': np.array([...])          # array shape = [3]
 54 |     }
 55 |     ```
 56 | 
 57 |     Note that the 'observation' entry in the above example has two levels of
 58 |     nesting, i.e it is a tuple of arrays.
 59 | 
 60 |     Args:
 61 |       sequence: a list of identically nested objects.
 62 | 
 63 |     Returns:
 64 |       A nested object with numpy.
 65 | 
 66 |     Raises:
 67 |       ValueError: If `sequence` is an empty sequence.
 68 |     """
 69 |     # Handle empty input sequences.
 70 |     if not sequence:
 71 |         raise ValueError("Input sequence must not be empty")
 72 | 
 73 |     # Default to asarray when arrays don't have the same shape to be compatible
 74 |     # with old behaviour.
 75 |     try:
 76 |         return fast_map_structure(lambda *values: np.stack(values), *sequence)
 77 |     except ValueError:
 78 |         return fast_map_structure(lambda *values: np.asarray(values), *sequence)
 79 | 
 80 | 
 81 | def unstack_sequence_fields(struct: ElementType, batch_size: int) -> List[ElementType]:
 82 |     """Converts a struct of batched arrays to a list of structs.
 83 | 
 84 |     This is effectively the inverse of `stack_sequence_fields`.
 85 | 
 86 |     Args:
 87 |       struct: An (arbitrarily nested) structure of arrays.
 88 |       batch_size: The length of the leading dimension of each array in the struct.
 89 |         This is assumed to be static and known.
 90 | 
 91 |     Returns:
 92 |       A list of structs with the same structure as `struct`, where each leaf node
 93 |        is an unbatched element of the original leaf node.
 94 |     """
 95 | 
 96 |     return [tree.map_structure(lambda s, i=i: s[i], struct) for i in range(batch_size)]
 97 | 
 98 | 
 99 | def tree_value_at_path(obj, paths: Tuple):
100 |     try:
101 |         for p in paths:
102 |             obj = obj[p]
103 |         return obj
104 |     except Exception as e:
105 |         raise ValueError(f"{e}\n\n-- Incorrect nested path {paths} for object: {obj}.")
106 | 


--------------------------------------------------------------------------------
/transic/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018-2023, NVIDIA Corporation
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | #    list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | #    this list of conditions and the following disclaimer in the documentation
 12 | #    and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | #    contributors may be used to endorse or promote products derived from
 16 | #    this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | 
 29 | # python
 30 | # import pwd
 31 | import getpass
 32 | import tempfile
 33 | import time
 34 | from collections import OrderedDict
 35 | from os.path import join
 36 | 
 37 | import numpy as np
 38 | import torch
 39 | import random
 40 | import os
 41 | 
 42 | 
 43 | def retry(times, exceptions):
 44 |     """
 45 |     Retry Decorator https://stackoverflow.com/a/64030200/1645784
 46 |     Retries the wrapped function/method `times` times if the exceptions listed
 47 |     in ``exceptions`` are thrown
 48 |     :param times: The number of times to repeat the wrapped function/method
 49 |     :type times: Int
 50 |     :param exceptions: Lists of exceptions that trigger a retry attempt
 51 |     :type exceptions: Tuple of Exceptions
 52 |     """
 53 | 
 54 |     def decorator(func):
 55 |         def newfn(*args, **kwargs):
 56 |             attempt = 0
 57 |             while attempt < times:
 58 |                 try:
 59 |                     return func(*args, **kwargs)
 60 |                 except exceptions:
 61 |                     print(
 62 |                         f"Exception thrown when attempting to run {func}, attempt {attempt} out of {times}"
 63 |                     )
 64 |                     time.sleep(min(2**attempt, 30))
 65 |                     attempt += 1
 66 | 
 67 |             return func(*args, **kwargs)
 68 | 
 69 |         return newfn
 70 | 
 71 |     return decorator
 72 | 
 73 | 
 74 | def flatten_dict(d, prefix="", separator="."):
 75 |     res = dict()
 76 |     for key, value in d.items():
 77 |         if isinstance(value, (dict, OrderedDict)):
 78 |             res.update(flatten_dict(value, prefix + key + separator, separator))
 79 |         else:
 80 |             res[prefix + key] = value
 81 | 
 82 |     return res
 83 | 
 84 | 
 85 | def set_np_formatting():
 86 |     """formats numpy print"""
 87 |     np.set_printoptions(
 88 |         edgeitems=30,
 89 |         infstr="inf",
 90 |         linewidth=4000,
 91 |         nanstr="nan",
 92 |         precision=2,
 93 |         suppress=False,
 94 |         threshold=10000,
 95 |         formatter=None,
 96 |     )
 97 | 
 98 | 
 99 | def set_seed(seed, torch_deterministic=False, rank=0):
100 |     """set seed across modules"""
101 |     if seed == -1 and torch_deterministic:
102 |         seed = 42 + rank
103 |     elif seed == -1:
104 |         seed = np.random.randint(0, 10000)
105 |     else:
106 |         seed = seed + rank
107 | 
108 |     print("Setting seed: {}".format(seed))
109 | 
110 |     random.seed(seed)
111 |     np.random.seed(seed)
112 |     torch.manual_seed(seed)
113 |     os.environ["PYTHONHASHSEED"] = str(seed)
114 |     torch.cuda.manual_seed(seed)
115 |     torch.cuda.manual_seed_all(seed)
116 | 
117 |     if torch_deterministic:
118 |         # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
119 |         os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
120 |         torch.backends.cudnn.benchmark = False
121 |         torch.backends.cudnn.deterministic = True
122 |         torch.use_deterministic_algorithms(True)
123 |     else:
124 |         torch.backends.cudnn.benchmark = True
125 |         torch.backends.cudnn.deterministic = False
126 | 
127 |     return seed
128 | 
129 | 
130 | def nested_dict_set_attr(d, key, val):
131 |     pre, _, post = key.partition(".")
132 |     if post:
133 |         nested_dict_set_attr(d[pre], post, val)
134 |     else:
135 |         d[key] = val
136 | 
137 | 
138 | def nested_dict_get_attr(d, key):
139 |     pre, _, post = key.partition(".")
140 |     if post:
141 |         return nested_dict_get_attr(d[pre], post)
142 |     else:
143 |         return d[key]
144 | 
145 | 
146 | def ensure_dir_exists(path):
147 |     if not os.path.exists(path):
148 |         os.makedirs(path)
149 |     return path
150 | 
151 | 
152 | def safe_ensure_dir_exists(path):
153 |     """Should be safer in multi-treaded environment."""
154 |     try:
155 |         return ensure_dir_exists(path)
156 |     except FileExistsError:
157 |         return path
158 | 
159 | 
160 | def get_username():
161 |     uid = os.getuid()
162 |     try:
163 |         return getpass.getuser()
164 |     except KeyError:
165 |         # worst case scenario - let's just use uid
166 |         return str(uid)
167 | 
168 | 
169 | def project_tmp_dir():
170 |     tmp_dir_name = f"ige_{get_username()}"
171 |     return safe_ensure_dir_exists(join(tempfile.gettempdir(), tmp_dir_name))
172 | 
173 | 
174 | # EOF
175 | 


--------------------------------------------------------------------------------
/transic/utils/wandb_utils.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import torch
  3 | import wandb
  4 | from rl_games.common.algo_observer import AlgoObserver
  5 | 
  6 | from transic.utils.utils import retry
  7 | from transic.utils.reformat import omegaconf_to_dict
  8 | 
  9 | 
 10 | class WandbAlgoObserver(AlgoObserver):
 11 |     """Need this to propagate the correct experiment name after initialization."""
 12 | 
 13 |     def __init__(self, cfg):
 14 |         super().__init__()
 15 |         self.cfg = cfg
 16 | 
 17 |     def before_init(self, base_name, config, experiment_name):
 18 |         """
 19 |         Must call initialization of Wandb before RL-games summary writer is initialized, otherwise
 20 |         sync_tensorboard does not work.
 21 |         """
 22 | 
 23 |         import wandb
 24 | 
 25 |         wandb_unique_id = f"uid_{experiment_name}"
 26 |         print(f"Wandb using unique id {wandb_unique_id}")
 27 | 
 28 |         cfg = self.cfg
 29 | 
 30 |         # this can fail occasionally, so we try a couple more times
 31 |         @retry(3, exceptions=(Exception,))
 32 |         def init_wandb():
 33 |             wandb.init(
 34 |                 project=cfg.wandb_project,
 35 |                 entity=cfg.wandb_entity,
 36 |                 group=cfg.wandb_group,
 37 |                 tags=cfg.wandb_tags,
 38 |                 sync_tensorboard=True,
 39 |                 id=wandb_unique_id,
 40 |                 name=experiment_name,
 41 |                 resume=True,
 42 |                 settings=wandb.Settings(start_method="fork"),
 43 |             )
 44 | 
 45 |             if cfg.wandb_logcode_dir:
 46 |                 wandb.run.log_code(root=cfg.wandb_logcode_dir)
 47 |                 print("wandb running directory........", wandb.run.dir)
 48 | 
 49 |         print("Initializing WandB...")
 50 |         try:
 51 |             init_wandb()
 52 |         except Exception as exc:
 53 |             print(f"Could not initialize WandB! {exc}")
 54 | 
 55 |         if isinstance(self.cfg, dict):
 56 |             wandb.config.update(self.cfg, allow_val_change=True)
 57 |         else:
 58 |             wandb.config.update(omegaconf_to_dict(self.cfg), allow_val_change=True)
 59 | 
 60 | 
 61 | class WandbVideoCaptureWrapper(gym.Wrapper):
 62 |     def __init__(
 63 |         self,
 64 |         env,
 65 |         n_parallel_recorders: int = 1,
 66 |         n_successful_videos_to_record: int = 50,
 67 |     ):
 68 |         super().__init__(env)
 69 |         n_parallel_recorders = min(n_parallel_recorders, env.num_envs)
 70 |         self._n_recorders = n_parallel_recorders
 71 |         self._videos = [[] for _ in range(n_parallel_recorders)]
 72 |         self._rcd_idxs = [
 73 |             i
 74 |             for i in range(env.num_envs)
 75 |             if i % (env.num_envs // n_parallel_recorders) == 0
 76 |         ][:n_parallel_recorders]
 77 |         self._n_video_saved = 0
 78 |         self._n_successful_video_saved = 0
 79 |         self._n_successful_videos_to_record = n_successful_videos_to_record
 80 | 
 81 |     def reset(self, **kwargs):
 82 |         self._videos = [[] for _ in range(self._n_recorders)]
 83 |         return super().reset(**kwargs)
 84 | 
 85 |     def step(self, action):
 86 |         obs, reward, done, info = super().step(action)
 87 |         for i, idx in enumerate(self._rcd_idxs):
 88 |             self._videos[i].append(self.env.camera_obs[idx].clone())
 89 |         if torch.any(done):
 90 |             for i, idx in enumerate(self._rcd_idxs):
 91 |                 if done[idx]:
 92 |                     video = torch.stack(self._videos[i])[
 93 |                         ..., :-1
 94 |                     ]  # (T, H, W, C), RGBA -> RGB
 95 |                     video = video.to(dtype=torch.uint8)
 96 |                     video = (
 97 |                         video.permute(0, 3, 1, 2).detach().cpu().numpy()
 98 |                     )  # (T, C, H, W)
 99 |                     video = wandb.Video(video, fps=10, format="mp4")
100 |                     succeeded = self.env.success_buf
101 |                     failed = self.env.failure_buf
102 |                     status = "timeout"
103 |                     if succeeded[idx]:
104 |                         status = "success"
105 |                         self._n_successful_video_saved += 1
106 |                     elif failed[idx]:
107 |                         status = "failure"
108 |                     wandb.log(
109 |                         {f"test_video/video-{self._n_video_saved}_{status}": video}
110 |                     )
111 |                     self._n_video_saved += 1
112 |                     self._videos[i] = []
113 |                     if (
114 |                         self._n_successful_video_saved
115 |                         >= self._n_successful_videos_to_record
116 |                     ):
117 |                         exit()
118 |         return obs, reward, done, info
119 | 


--------------------------------------------------------------------------------