├── utils ├── __init__.py ├── losess.py ├── ransac.py └── frame_utils.py ├── isaacsim ├── .gitignore ├── requirements.txt ├── pattern.png ├── replicate │ ├── __init__.py │ └── std_object.py ├── render.py ├── README.md ├── config │ └── hssd.yaml ├── utils_func.py ├── replicator.py └── custom_writer.py ├── datasets ├── .gitignore ├── Real │ └── xiaomeng │ │ ├── 0000_ir_l.png │ │ ├── 0000_ir_r.png │ │ ├── 0000_rgb.png │ │ ├── 0000_depth.png │ │ └── 0000_raw_disparity.png └── README.md ├── raw_aligned.png ├── assets ├── in-the-wild.png └── examples │ ├── 0000_ir_l.png │ ├── 0000_ir_r.png │ ├── 0000_rgb.png │ └── 0000_depth.png ├── .gitignore ├── conf ├── config.yaml └── task │ ├── eval_ldm_his.yaml │ ├── eval_his_sim.yaml │ ├── eval_ldm_mixed.yaml │ ├── eval_dreds_reprod.yaml │ ├── eval_ldm_mixed_rgb+raw.yaml │ ├── eval_ldm_mixed_cond_rgbd.yaml │ ├── eval_clearpose.yaml │ ├── eval_syntodd_rgbd.yaml │ ├── eval_sceneflow.yaml │ ├── eval_ldm_mono.yaml │ ├── eval_ldm.yaml │ ├── train_ldm_mixed.yaml │ ├── train_ldm_mono.yaml │ ├── train_sceneflow.yaml │ ├── train_hiss.yaml │ ├── train_ldm_mixed_rgb+raw.yaml │ ├── train_dreds_reprod.yaml │ ├── train_ldm_mixed_left+right+raw.yaml │ ├── train_ldm_mixed_cond_rgbd.yaml │ ├── train_clearpose.yaml │ ├── train_ldm_mixed_gapartnet.yaml │ └── train_syntodd_rgbd.yaml ├── pyrightconfig.json ├── scripts ├── check_sceneflow.py └── check_stereo.py ├── data ├── dataset.py ├── data_loader.py └── augmentor.py ├── core ├── praser.py └── resample.py ├── distributed_evaluate.py ├── README.md ├── config.py └── inference.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /isaacsim/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | output_ir 3 | **/*.pyc -------------------------------------------------------------------------------- /isaacsim/requirements.txt: -------------------------------------------------------------------------------- 1 | hydra-core==1.3.2 2 | transforms3d -------------------------------------------------------------------------------- /datasets/.gitignore: -------------------------------------------------------------------------------- 1 | clearpose** 2 | DREDS** 3 | HISS** 4 | sceneflow** -------------------------------------------------------------------------------- /raw_aligned.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/raw_aligned.png -------------------------------------------------------------------------------- /isaacsim/pattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/isaacsim/pattern.png -------------------------------------------------------------------------------- /assets/in-the-wild.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/in-the-wild.png -------------------------------------------------------------------------------- /assets/examples/0000_ir_l.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/examples/0000_ir_l.png -------------------------------------------------------------------------------- /assets/examples/0000_ir_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/examples/0000_ir_r.png -------------------------------------------------------------------------------- /assets/examples/0000_rgb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/examples/0000_rgb.png -------------------------------------------------------------------------------- /assets/examples/0000_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/examples/0000_depth.png -------------------------------------------------------------------------------- /datasets/Real/xiaomeng/0000_ir_l.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_ir_l.png -------------------------------------------------------------------------------- /datasets/Real/xiaomeng/0000_ir_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_ir_r.png -------------------------------------------------------------------------------- /datasets/Real/xiaomeng/0000_rgb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_rgb.png -------------------------------------------------------------------------------- /datasets/Real/xiaomeng/0000_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_depth.png -------------------------------------------------------------------------------- /isaacsim/replicate/__init__.py: -------------------------------------------------------------------------------- 1 | from .scene_replicator import Replicator 2 | from .std_object import STDObjectReplicator 3 | 4 | -------------------------------------------------------------------------------- /datasets/Real/xiaomeng/0000_raw_disparity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_raw_disparity.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.pyc 2 | experiments* 3 | checkpoint 4 | _outputs* 5 | _outputs/** 6 | checkpoint/** 7 | test_* 8 | backup 9 | bad_sim* 10 | .vscode 11 | -------------------------------------------------------------------------------- /conf/config.yaml: -------------------------------------------------------------------------------- 1 | 2 | defaults: 3 | - _self_ 4 | - task: train_ldm 5 | 6 | debug: false 7 | seed: -1 8 | 9 | hydra: 10 | run: 11 | dir: _outputs/${hydra.job.name} 12 | -------------------------------------------------------------------------------- /conf/task/eval_ldm_his.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_ldm_his 3 | 4 | name: ldm_his 5 | resume_pretrained: 6 | camera_resolution: 640x360 # W,H 7 | image_size: [180, 320] # H,W 8 | eval_dataset: [HISS] 9 | eval_num_batch: -1 10 | eval_batch_size: 4 11 | num_inference_timesteps: 10 12 | num_intermediate_images: 5 13 | num_inference_rounds: 1 14 | 15 | -------------------------------------------------------------------------------- /conf/task/eval_his_sim.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_his_sim 3 | 4 | resume_pretrained: 5 | camera_resolution: 224x126 # WxH 6 | image_size: [126, 224] # H,W 7 | safe_ssi: true 8 | eval_dataset: [HISS] 9 | eval_num_batch: -1 10 | eval_batch_size: 32 11 | sampler: my_ddpm 12 | num_inference_timesteps: 128 13 | num_intermediate_images: 8 14 | num_inference_rounds: 1 15 | write_pcd: true -------------------------------------------------------------------------------- /conf/task/eval_ldm_mixed.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_ldm_mixed 3 | 4 | # was the best version in real during the submition to CoRL 2024 5 | name: eval_ldm_sf 6 | resume_pretrained: 7 | camera_resolution: 480x270 # W,H 8 | image_size: [180,320] # H,W 9 | eval_dataset: [Real_xiaomeng_fxm] 10 | eval_num_batch: -1 11 | eval_batch_size: 4 12 | num_inference_timesteps: 10 13 | num_intermediate_images: 5 14 | num_inference_rounds: 1 15 | -------------------------------------------------------------------------------- /conf/task/eval_dreds_reprod.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_dreds_reprod 3 | 4 | name: dreds 5 | resume_pretrained: 6 | cond_channels: left+right+raw 7 | camera_resolution: 224x126 # WxH 8 | image_size: [126, 224] # H,W 9 | safe_ssi: true 10 | train_dataset: [Dreds] 11 | eval_dataset: [Dreds] 12 | eval_num_batch: -1 13 | eval_batch_size: 32 14 | save_model_epochs: 5 15 | num_inference_timesteps: 128 16 | num_intermediate_images: 8 17 | sampler: my_ddpm 18 | -------------------------------------------------------------------------------- /conf/task/eval_ldm_mixed_rgb+raw.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_ldm_mixed_rgb+raw 3 | 4 | # was the best version in real during the submition to CoRL 2024 5 | name: eval_ldm_mixed_rgb+raw 6 | resume_pretrained: 7 | camera_resolution: 480x270 # W,H 8 | image_size: [180,320] # H,W 9 | eval_dataset: [Real_xiaomeng_fxm] 10 | eval_num_batch: -1 11 | eval_batch_size: 4 12 | num_inference_timesteps: 10 13 | num_intermediate_images: 5 14 | num_inference_rounds: 1 15 | -------------------------------------------------------------------------------- /conf/task/eval_ldm_mixed_cond_rgbd.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_ldm_mixed_cond_rgbd 3 | 4 | name: ldm_sf 5 | resume_pretrained: experiments/ldm_sf-0807.dep4.lr3e-05.v_prediction.nossi.scaled_linear.randn.ddpm1000.ClearPose_Dreds_HISS.240x320.rgb+raw.w0.0/best 6 | camera_resolution: 320x240 # WxH 7 | image_size: [240,320] # H,W 8 | eval_dataset: [ClearPose] 9 | eval_num_batch: -1 10 | sampler: ddim 11 | num_inference_timesteps: 10 12 | num_intermediate_images: 5 13 | num_inference_rounds: 1 -------------------------------------------------------------------------------- /conf/task/eval_clearpose.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_clearpose 3 | 4 | name: clearpose 5 | resume_pretrained: experiments/clearpose-0809.dep1.lr1e-04.sample.ssi.squaredcos_cap_v2.pyramid.my_ddpm128.ClearPose_Dreds_HISS.240x320.rgb+raw.w0.0/best 6 | eval_num_batch: -1 7 | camera_resolution: 320x240 # WxH 8 | image_size: [240,320] # H,W 9 | eval_dataset: [ClearPose] 10 | num_intermediate_images: 8 11 | sampler: my_ddpm 12 | plot_error_map: false 13 | plot_denoised_images: false 14 | eval_batch_size: 96 15 | eval_split: "test" 16 | safe_ssi: false 17 | -------------------------------------------------------------------------------- /conf/task/eval_syntodd_rgbd.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_syntodd_rgbd 3 | 4 | name: clearpose 5 | resume_pretrained: experiments/syntodd_rgbd-0810.dep1.lr1e-04.sample.ssi.squaredcos_cap_v2.pyramid.my_ddpm128.SynTODDRgbd.240x320.rgb+raw.w0.0/best 6 | eval_num_batch: -1 7 | camera_resolution: 320x240 # WxH 8 | image_size: [240,320] # H,W 9 | eval_dataset: [SynTODDRgbd] 10 | num_intermediate_images: 8 11 | sampler: my_ddpm 12 | plot_error_map: false 13 | plot_denoised_images: false 14 | eval_batch_size: 12 15 | eval_split: "test" 16 | safe_ssi: false 17 | -------------------------------------------------------------------------------- /conf/task/eval_sceneflow.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_sceneflow 3 | 4 | name: eval_sceneflow 5 | resume_pretrained: 6 | eval_dataset: [SceneFlow] 7 | eval_split: val 8 | camera_resolution: 960x540 9 | image_size: [540, 960] 10 | eval_num_batch: -1 11 | eval_batch_size: 3 12 | eval_output: "" # use default 13 | prediction_type: sample 14 | flow_guidance_mode: imputation 15 | flow_guidance_weights: [0] 16 | num_inference_rounds: 1 17 | num_inference_timesteps: 10 18 | num_intermediate_images: 5 19 | plot_denoised_images: true 20 | plot_intermediate_metrics: false 21 | write_pcd: false 22 | plot_error_map: true 23 | ensemble: false 24 | ssi: false -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": ["datasets", "experiments", "experiments.corl24", "checkpoint", "_outputs"], 3 | "reportPrivateImportUsage": false, 4 | "reportOptionalMemberAccess": false, 5 | "reportCallIssue": false, 6 | "reportPossiblyUnboundVariable": false, 7 | "reportArgumentType": false, 8 | "reportOptionalSubscript": false, 9 | "reportAttributeAccessIssue": false, 10 | "reportOptionalOperand": false, 11 | "reportIndexIssue": false, 12 | "reportAssignmentType": false, 13 | "reportOperatorIssue": false, 14 | "reportReturnType": false, 15 | "reportGeneralTypeIssues": false 16 | } -------------------------------------------------------------------------------- /conf/task/eval_ldm_mono.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_ldm_mono 3 | 4 | name: eval_ldm_mono 5 | resume_pretrained: experiments/ldm_mono-0809.dep4.lr3e-05.v_prediction.ssi.scaled_linear.randn.ddpm1000.SynTODD.240x320.rgb.w0.0/best 6 | eval_dataset: [SynTODD] 7 | eval_split: test 8 | # camera_resolution: 640x480 9 | # image_size: [480, 640] 10 | eval_num_batch: -1 11 | eval_batch_size: 16 12 | num_inference_rounds: 1 13 | num_inference_timesteps: 10 14 | num_intermediate_images: 5 15 | plot_denoised_images: false 16 | plot_error_map: true 17 | write_pcd: false 18 | # ensemble: false 19 | # safe_ssi: true 20 | # ransac_error_threshold: 0.6 # rmse error, 0.6 for nyu 21 | 22 | -------------------------------------------------------------------------------- /conf/task/eval_ldm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - train_ldm 3 | 4 | name: eval_ft_sd2_hypersim 5 | resume_pretrained: experiments/d.fixed.lr3e-05.v_prediction.ssi.scaled_linear.randn.ssi.my_ddpm1000.HyperSim.240x320.cond4.w0.0/epoch_0038 6 | # train_dataset: [HyperSim] 7 | eval_dataset: [NYUv2] 8 | eval_split: val 9 | camera_resolution: 640x480 10 | image_size: [480, 640] 11 | eval_num_batch: -1 12 | eval_batch_size: 3 13 | eval_output: "" # use default 14 | flow_guidance_mode: imputation 15 | flow_guidance_weights: [0] 16 | num_inference_rounds: 1 17 | num_inference_timesteps: 10 18 | num_intermediate_images: 5 19 | plot_denoised_images: true 20 | write_pcd: false 21 | plot_error_map: true 22 | ensemble: false 23 | # safe_ssi: true 24 | # ransac_error_threshold: 0.6 # rmse error, 0.6 for nyu 25 | 26 | -------------------------------------------------------------------------------- /conf/task/train_ldm_mixed.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: ldm_sf 5 | resume_pretrained: 6 | ldm: true 7 | depth_channels: 4 8 | divis_by: 8 9 | prediction_space: disp 10 | camera_resolution: 480x270 # W,H 11 | image_size: [180,320] # H,W 12 | train_dataset: [SceneFlow] 13 | eval_dataset: [SceneFlow] 14 | train_batch_size: 16 15 | gradient_accumulation_steps: 1 16 | eval_num_batch: -1 17 | eval_batch_size: 4 18 | lr_warmup_steps: 0 19 | learning_rate: 3e-5 20 | lr_scheduler: constant # linear: almost the same as constant 21 | val_every_global_steps: 1000 22 | save_model_epochs: 3 23 | num_train_timesteps: 1000 24 | num_inference_timesteps: 10 25 | num_intermediate_images: 5 26 | num_inference_rounds: 1 27 | ssi: false 28 | normalize_mode: average 29 | num_chs: 1 30 | ch_bounds: [128.] 31 | ch_gammas: [1.] 32 | noise_strategy: randn 33 | loss_type: mse 34 | prediction_type: v_prediction 35 | sampler: ddpm 36 | num_epochs: 200 37 | cond_channels: left+right+raw 38 | beta_schedule: scaled_linear 39 | beta_start: 0.00085 40 | beta_end: 0.012 41 | mixed_precision: "no" 42 | thresholding: false 43 | clip_sample: false 44 | block_out_channels: [0] # N/A 45 | -------------------------------------------------------------------------------- /conf/task/train_ldm_mono.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: ldm_mono 5 | resume_pretrained: 6 | ldm: true 7 | depth_channels: 4 8 | divis_by: 8 9 | prediction_space: depth 10 | camera_resolution: 320x240 # WxH 11 | image_size: [240,320] # H,W 12 | train_dataset: [SynTODD] 13 | eval_dataset: [SynTODD] 14 | dataset_weight: [1] 15 | train_batch_size: 12 16 | gradient_accumulation_steps: 1 17 | eval_num_batch: -1 18 | eval_batch_size: 4 19 | lr_warmup_steps: 5000 20 | learning_rate: 3e-5 21 | lr_scheduler: constant # linear: almost the same as constant 22 | val_every_global_steps: 1000 23 | save_model_epochs: 3 24 | num_train_timesteps: 1000 25 | num_inference_timesteps: 10 26 | num_intermediate_images: 5 27 | num_inference_rounds: 1 28 | ssi: true 29 | normalize_mode: average 30 | num_chs: 1 31 | ch_bounds: [1.] 32 | ch_gammas: [1.] 33 | noise_strategy: randn 34 | loss_type: mse 35 | prediction_type: v_prediction 36 | sampler: ddpm 37 | num_epochs: 200 38 | cond_channels: rgb 39 | beta_schedule: scaled_linear 40 | beta_start: 0.00085 41 | beta_end: 0.012 42 | mixed_precision: "no" 43 | thresholding: false 44 | clip_sample: false 45 | block_out_channels: [0] # N/A 46 | -------------------------------------------------------------------------------- /conf/task/train_sceneflow.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: sceneflow 5 | ldm: false 6 | prediction_space: disp 7 | ssi: false 8 | normalize_mode: average 9 | ch_bounds: [128] 10 | ch_gammas: [1.0] 11 | resume_pretrained: 12 | camera_resolution: 480x270 #960x540 # W,H 13 | image_size: [270, 480] # H,W 14 | train_dataset: [SceneFlow] 15 | eval_dataset: [SceneFlow] 16 | train_batch_size: 4 17 | eval_num_batch: -1 18 | eval_batch_size: 8 19 | lr_warmup_steps: 1000 20 | learning_rate: 1e-4 21 | lr_scheduler: linear 22 | gradient_accumulation_steps: 1 23 | val_every_global_steps: 2000 24 | save_model_epochs: 5 25 | num_train_timesteps: 128 26 | num_inference_timesteps: 10 27 | num_intermediate_images: 5 28 | num_inference_rounds: 1 29 | block_out_channels: [128, 128, 256, 256, 512, 512] 30 | noise_strategy: pyramid 31 | loss_type: l1 32 | prediction_type: sample 33 | num_epochs: 600 34 | cond_channels: left+right+raw 35 | depth_channels: 3 36 | beta_schedule: squaredcos_cap_v2 37 | beta_start: 1e-4 38 | beta_end: 2e-2 39 | sampler: my_ddpm 40 | mixed_precision: "no" 41 | thresholding: true 42 | dynamic_thresholding_ratio: 0.995 43 | clip_sample: true 44 | clip_sample_range: 1.0 -------------------------------------------------------------------------------- /conf/task/train_hiss.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: hiss 5 | ldm: false 6 | prediction_space: disp 7 | resume_pretrained: 8 | cond_channels: left+right+raw 9 | camera_resolution: 224x126 # WxH 10 | image_size: [126, 224] # H,W 11 | ssi: true 12 | safe_ssi: true 13 | train_dataset: [HISS] 14 | eval_dataset: [HISS] 15 | normalize_mode: average 16 | ch_bounds: [64.] 17 | ch_gammas: [1.] 18 | num_chs: 1 19 | norm_s: 2 20 | norm_t: 0.5 21 | train_batch_size: 32 22 | eval_num_batch: -1 23 | eval_batch_size: 32 24 | lr_warmup_steps: 1000 25 | learning_rate: 0.0001 26 | lr_scheduler: constant 27 | gradient_accumulation_steps: 1 28 | val_every_global_steps: 5000 29 | save_model_epochs: 5 30 | num_train_timesteps: 128 31 | num_inference_timesteps: 8 32 | num_intermediate_images: 4 33 | num_inference_rounds: 1 34 | block_out_channels: [128, 128, 256, 256, 512, 512] 35 | noise_strategy: pyramid 36 | loss_type: mse 37 | prediction_type: sample 38 | num_epochs: 200 39 | depth_channels: 1 40 | beta_schedule: squaredcos_cap_v2 41 | beta_start: 0.0001 42 | beta_end: 0.02 43 | sampler: my_ddpm 44 | mixed_precision: "no" 45 | thresholding: true 46 | dynamic_thresholding_ratio: 0.995 47 | clip_sample: true 48 | clip_sample_range: 1.0 -------------------------------------------------------------------------------- /conf/task/train_ldm_mixed_rgb+raw.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: ldm_sf 5 | resume_pretrained: 6 | ldm: true 7 | depth_channels: 4 8 | divis_by: 8 9 | prediction_space: disp 10 | camera_resolution: 480x270 # W,H 11 | image_size: [180,320] # H,W 12 | train_dataset: [Dreds, HISS, ClearPose] 13 | dataset_weight: [1,1,1] 14 | eval_dataset: [Dreds, HISS, Real_xiaomeng_fxm] 15 | train_batch_size: 16 16 | gradient_accumulation_steps: 1 17 | eval_num_batch: -1 18 | eval_batch_size: 4 19 | lr_warmup_steps: 0 20 | learning_rate: 3e-5 21 | lr_scheduler: constant # linear: almost the same as constant 22 | val_every_global_steps: 1000 23 | save_model_epochs: 3 24 | num_train_timesteps: 1000 25 | num_inference_timesteps: 10 26 | num_intermediate_images: 5 27 | num_inference_rounds: 1 28 | ssi: false 29 | normalize_mode: average 30 | num_chs: 1 31 | ch_bounds: [128.] 32 | ch_gammas: [1.] 33 | noise_strategy: randn 34 | loss_type: mse 35 | prediction_type: v_prediction 36 | sampler: ddpm 37 | num_epochs: 200 38 | cond_channels: rgb+raw 39 | beta_schedule: scaled_linear 40 | beta_start: 0.00085 41 | beta_end: 0.012 42 | mixed_precision: "no" 43 | thresholding: false 44 | clip_sample: false 45 | block_out_channels: [0] # N/A 46 | -------------------------------------------------------------------------------- /conf/task/train_dreds_reprod.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: dreds 5 | ldm: false 6 | prediction_space: disp 7 | resume_pretrained: 8 | cond_channels: left+right+raw 9 | camera_resolution: 224x126 # WxH 10 | image_size: [126, 224] # H,W 11 | ssi: true 12 | safe_ssi: true 13 | train_dataset: [Dreds] 14 | eval_dataset: [Dreds] 15 | normalize_mode: average 16 | ch_bounds: [64.] 17 | ch_gammas: [1.] 18 | num_chs: 1 19 | norm_s: 2 20 | norm_t: 0.5 21 | train_batch_size: 32 22 | eval_num_batch: -1 23 | eval_batch_size: 32 24 | lr_warmup_steps: 1000 25 | learning_rate: 0.0001 26 | lr_scheduler: constant 27 | gradient_accumulation_steps: 1 28 | val_every_global_steps: 5000 29 | save_model_epochs: 5 30 | num_train_timesteps: 128 31 | num_inference_timesteps: 8 32 | num_intermediate_images: 4 33 | num_inference_rounds: 1 34 | block_out_channels: [128, 128, 256, 256, 512, 512] 35 | noise_strategy: pyramid 36 | loss_type: mse 37 | prediction_type: sample 38 | num_epochs: 200 39 | depth_channels: 1 40 | beta_schedule: squaredcos_cap_v2 41 | beta_start: 0.0001 42 | beta_end: 0.02 43 | sampler: my_ddpm 44 | mixed_precision: "no" 45 | thresholding: true 46 | dynamic_thresholding_ratio: 0.995 47 | clip_sample: true 48 | clip_sample_range: 1.0 -------------------------------------------------------------------------------- /conf/task/train_ldm_mixed_left+right+raw.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: ldm_sf 5 | resume_pretrained: 6 | ldm: true 7 | depth_channels: 4 8 | divis_by: 8 9 | prediction_space: disp 10 | camera_resolution: 480x270 # W,H 11 | image_size: [180,320] # H,W 12 | train_dataset: [SceneFlow, Dreds, HISS] 13 | dataset_weight: [1,1,1] 14 | eval_dataset: [SceneFlow, Dreds, HISS, Real_xiaomeng_fxm] 15 | train_batch_size: 16 16 | gradient_accumulation_steps: 1 17 | eval_num_batch: -1 18 | eval_batch_size: 4 19 | lr_warmup_steps: 0 20 | learning_rate: 3e-5 21 | lr_scheduler: constant # linear: almost the same as constant 22 | val_every_global_steps: 1000 23 | save_model_epochs: 3 24 | num_train_timesteps: 1000 25 | num_inference_timesteps: 10 26 | num_intermediate_images: 5 27 | num_inference_rounds: 1 28 | ssi: false 29 | normalize_mode: average 30 | num_chs: 1 31 | ch_bounds: [128.] 32 | ch_gammas: [1.] 33 | noise_strategy: randn 34 | loss_type: mse 35 | prediction_type: v_prediction 36 | sampler: ddpm 37 | num_epochs: 200 38 | cond_channels: left+right+raw 39 | beta_schedule: scaled_linear 40 | beta_start: 0.00085 41 | beta_end: 0.012 42 | mixed_precision: "no" 43 | thresholding: false 44 | clip_sample: false 45 | block_out_channels: [0] # N/A 46 | -------------------------------------------------------------------------------- /conf/task/train_ldm_mixed_cond_rgbd.yaml: -------------------------------------------------------------------------------- 1 | fdefaults: 2 | - cfg 3 | 4 | name: ldm_sf 5 | resume_pretrained: 6 | ldm: true 7 | depth_channels: 4 8 | divis_by: 8 9 | prediction_space: disp 10 | camera_resolution: 320x240 # WxH 11 | image_size: [240,320] # H,W 12 | train_dataset: [ClearPose, Dreds, HISS] # [Dreds] # 13 | eval_dataset: [ClearPose, Dreds, HISS] # [Dreds] # 14 | dataset_weight: [1, 1, 1] # [1] # 15 | train_batch_size: 16 16 | gradient_accumulation_steps: 1 17 | eval_num_batch: -1 18 | eval_batch_size: 4 19 | lr_warmup_steps: 5000 20 | learning_rate: 3e-5 21 | lr_scheduler: constant # linear: almost the same as constant 22 | val_every_global_steps: 1000 23 | save_model_epochs: 3 24 | num_train_timesteps: 1000 25 | num_inference_timesteps: 10 26 | num_intermediate_images: 5 27 | num_inference_rounds: 1 28 | ssi: false 29 | normalize_mode: average 30 | num_chs: 1 31 | ch_bounds: [64.0] 32 | ch_gammas: [1.] 33 | noise_strategy: randn 34 | loss_type: mse 35 | prediction_type: v_prediction 36 | sampler: ddpm 37 | num_epochs: 200 38 | cond_channels: rgb+raw 39 | beta_schedule: scaled_linear 40 | beta_start: 0.00085 41 | beta_end: 0.012 42 | mixed_precision: "no" 43 | thresholding: false 44 | clip_sample: false 45 | block_out_channels: [0] # N/A 46 | -------------------------------------------------------------------------------- /conf/task/train_clearpose.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: clearpose 5 | ldm: false 6 | prediction_space: disp 7 | resume_pretrained: 8 | cond_channels: rgb+raw 9 | camera_resolution: 320x240 # WxH 10 | image_size: [240, 320] # H,W 11 | ssi: true 12 | safe_ssi: false 13 | train_dataset: [ClearPose, Dreds, HISS] # [Dreds] # 14 | eval_dataset: [ClearPose, Dreds, HISS] # [Dreds] # 15 | dataset_weight: [1, 1, 1] # [1] # 16 | normalize_mode: average 17 | ch_bounds: [64.] 18 | ch_gammas: [1.] 19 | num_chs: 1 20 | norm_s: 2 21 | norm_t: 0.5 22 | train_batch_size: 12 # 32 works for 224x126 23 | eval_num_batch: -1 24 | eval_batch_size: 32 25 | lr_warmup_steps: 5000 26 | learning_rate: 0.0001 27 | lr_scheduler: constant 28 | gradient_accumulation_steps: 1 29 | val_every_global_steps: 5000 30 | save_model_epochs: 5 31 | num_train_timesteps: 128 32 | num_inference_timesteps: 8 33 | num_intermediate_images: 4 34 | num_inference_rounds: 1 35 | block_out_channels: [128, 128, 256, 256, 512, 512] 36 | noise_strategy: pyramid 37 | loss_type: mse 38 | prediction_type: sample 39 | num_epochs: 200 40 | depth_channels: 1 41 | beta_schedule: squaredcos_cap_v2 42 | beta_start: 0.0001 43 | beta_end: 0.02 44 | sampler: my_ddpm 45 | mixed_precision: "no" 46 | thresholding: true 47 | dynamic_thresholding_ratio: 0.995 48 | clip_sample: true 49 | clip_sample_range: 1.0 -------------------------------------------------------------------------------- /conf/task/train_ldm_mixed_gapartnet.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: ldm_mixed_gapartnet 5 | resume_pretrained: 6 | ldm: true 7 | depth_channels: 4 8 | divis_by: 8 9 | prediction_space: disp 10 | camera_resolution: 320x180 # W,H 11 | # camera_resolution: 480x270 # W,H 12 | image_size: [180,320] # H,W 13 | # image_size: [270,480] # H,W 14 | train_dataset: [SceneFlow, Dreds, HISS, Gapartnet2] 15 | eval_dataset: [SceneFlow, Dreds, HISS, Gapartnet2, Real] 16 | dataset_weight: [1, 1, 1, 1] 17 | train_batch_size: 16 18 | gradient_accumulation_steps: 1 19 | eval_num_batch: 10 20 | eval_batch_size: 4 21 | lr_warmup_steps: 0 22 | learning_rate: 3e-5 23 | lr_scheduler: constant # linear: almost the same as constant 24 | val_every_global_steps: 1000 25 | save_model_epochs: 3 26 | num_train_timesteps: 1000 27 | num_inference_timesteps: 10 28 | num_intermediate_images: 5 29 | num_inference_rounds: 1 30 | ssi: false 31 | normalize_mode: average 32 | num_chs: 1 33 | ch_bounds: [128.] 34 | ch_gammas: [1.] 35 | noise_strategy: randn 36 | loss_type: mse 37 | prediction_type: v_prediction 38 | sampler: ddpm 39 | num_epochs: 200 40 | cond_channels: left+right+raw 41 | beta_schedule: scaled_linear 42 | beta_start: 0.00085 43 | beta_end: 0.012 44 | mixed_precision: "no" 45 | thresholding: false 46 | clip_sample: false 47 | block_out_channels: [0] # N/A 48 | -------------------------------------------------------------------------------- /conf/task/train_syntodd_rgbd.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cfg 3 | 4 | name: syntodd_rgbd 5 | ldm: false 6 | prediction_space: disp 7 | resume_pretrained: 8 | cond_channels: rgb+raw 9 | camera_resolution: 320x240 # WxH 10 | image_size: [240, 320] # H,W 11 | ssi: true 12 | safe_ssi: false 13 | train_dataset: [SynTODDRgbd] # 14 | eval_dataset: [SynTODDRgbd] # 15 | dataset_variant: simdepth # "simdepth", "erodedepth", "dilatedepth" 16 | dataset_weight: [1] # [1] # 17 | normalize_mode: average 18 | ch_bounds: [64.] 19 | ch_gammas: [1.] 20 | num_chs: 1 21 | norm_s: 2 22 | norm_t: 0.5 23 | train_batch_size: 12 # 32 works for 224x126 24 | eval_num_batch: -1 25 | eval_batch_size: 32 26 | lr_warmup_steps: 5000 27 | learning_rate: 0.0001 28 | lr_scheduler: constant 29 | gradient_accumulation_steps: 1 30 | val_every_global_steps: 5000 31 | save_model_epochs: 5 32 | num_train_timesteps: 128 33 | num_inference_timesteps: 8 34 | num_intermediate_images: 4 35 | num_inference_rounds: 1 36 | block_out_channels: [128, 128, 256, 256, 512, 512] 37 | noise_strategy: pyramid 38 | loss_type: mse 39 | prediction_type: sample 40 | num_epochs: 200 41 | depth_channels: 1 42 | beta_schedule: squaredcos_cap_v2 43 | beta_start: 0.0001 44 | beta_end: 0.02 45 | sampler: my_ddpm 46 | mixed_precision: "no" 47 | thresholding: true 48 | dynamic_thresholding_ratio: 0.995 49 | clip_sample: true 50 | clip_sample_range: 1.0 -------------------------------------------------------------------------------- /isaacsim/render.py: -------------------------------------------------------------------------------- 1 | """Generate infrared rendering using replicator 2 | """ 3 | import json 4 | import math 5 | import os 6 | import random 7 | import sys 8 | 9 | import carb 10 | import yaml 11 | from omni.isaac.kit import SimulationApp 12 | 13 | from omegaconf import DictConfig, OmegaConf 14 | from hydra import compose, initialize 15 | import hydra 16 | 17 | # hydra: load config 18 | with initialize(version_base=None, config_path="config", job_name="replicator_ir"): 19 | cfg = compose(config_name="hssd.yaml" , overrides=sys.argv[1:]) 20 | 21 | if cfg["seed"] >= 0: 22 | random.seed(cfg["seed"]) 23 | 24 | # start simulation 25 | _app = SimulationApp(launch_config=cfg['launch_config']) 26 | _Log = _app.app.print_and_log 27 | 28 | from omni.isaac.core import World 29 | from replicator import IRReplicator 30 | 31 | # main program 32 | def run(cfg: DictConfig) -> None: 33 | _Log("start running") 34 | _world = World(set_defaults=True) #**cfg['world'], 35 | _world.set_simulation_dt(**cfg["world"]) 36 | 37 | # start replicator 38 | rep = IRReplicator(_app, _world, cfg) 39 | rep.start() 40 | 41 | _Log("keep GUI running if headless is False") 42 | while _app.is_running() and not cfg['launch_config']['headless']: 43 | _world.step(render=True) 44 | 45 | _app.close() 46 | 47 | if __name__ == "__main__": 48 | run(cfg) 49 | -------------------------------------------------------------------------------- /isaacsim/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Data Generation in simulation 3 | 4 | Although we do not plan to release all the sources for generating `HISS` dataset, I want to share an example code of generating IR renderings using [IsaacSim 4.0.0](https://docs.isaacsim.omniverse.nvidia.com/4.0.0/installation/install_container.html). 5 | 6 | > This code should also work on Newer version of Isaac Sim with very few changes. If you encounter any problem please feel free to contact me. 7 | 8 | 9 | ### 1. prepare data 10 | 11 | + Download [HSSD scenes](https://huggingface.co/datasets/hssd/hssd-scenes) from here 12 | 13 | Notice that HSSD scenes are very big, you can download some of them for using. 14 | 15 | eg., I set [107734119_175999932](https://huggingface.co/datasets/hssd/hssd-scenes/blob/main/scenes/107734119_175999932.glb) as the default scene in `config/hssd.yaml` 16 | 17 | Please first convert it to USD file using [USD composer](https://docs.omniverse.nvidia.com/composer/latest/index.html). 18 | 19 | + Download object cad models from dreds, [link](https://mirrors.pku.edu.cn/dl-release/DREDS_ECCV2022/data/cad_model/) 20 | 21 | + Download NVIDIA Omniverse [vMaterials_2](https://developer.nvidia.com/vmaterials) 22 | 23 | 24 | Put them all in `data` folder, example folder structure: 25 | 26 | ``` 27 | data 28 | ├── dreds 29 | │ ├── cad_model 30 | │ │ ├── 00000000 31 | │ │ ├── 02691156 32 | │ │ ├── 02876657 33 | │ │ ├── 02880940 34 | │ │ ├── 02942699 35 | │ │ ├── 02946921 36 | │ │ ├── 02954340 37 | │ │ ├── 02958343 38 | │ │ ├── 02992529 39 | │ │ └── 03797390 40 | │ └── output 41 | ├── hssd 42 | │ └── scenes 43 | │ └── 107734119_175999932 44 | └── vMaterials_2 45 | ├── Carpet 46 | ..... 47 | ``` 48 | 49 | ### 2. start isaac sim 4.0.0 Container 50 | 51 | Change your project dir and start isaac-sim container 52 | 53 | ``` 54 | docker run --name isaac-sim --entrypoint bash -it --runtime=nvidia --gpus all -e "ACCEPT_EULA=Y" --rm --network=host \ 55 | -e "PRIVACY_CONSENT=Y" \ 56 | -v ~/workspace/projects/d3roma/isaacsim:/root/d3roma:rw \ 57 | -v ~/docker/isaac-sim/cache/kit:/isaac-sim/kit/cache:rw \ 58 | -v ~/docker/isaac-sim/cache/ov:/root/.cache/ov:rw \ 59 | -v ~/docker/isaac-sim/cache/pip:/root/.cache/pip:rw \ 60 | -v ~/docker/isaac-sim/cache/glcache:/root/.cache/nvidia/GLCache:rw \ 61 | -v ~/docker/isaac-sim/cache/computecache:/root/.nv/ComputeCache:rw \ 62 | -v ~/docker/isaac-sim/logs:/root/.nvidia-omniverse/logs:rw \ 63 | -v ~/docker/isaac-sim/data:/root/.local/share/ov/data:rw \ 64 | -v ~/docker/isaac-sim/documents:/root/Documents:rw \ 65 | nvcr.io/nvidia/isaac-sim:4.0.0 66 | ``` 67 | 68 | ### 3. install python packages into isaac-sim 69 | 70 | ``` 71 | /isaac-sim/python.sh -m pip install -r requirements.txt 72 | ``` 73 | 74 | ### 4. generate IR renderings 75 | ``` 76 | cd /root/d3roma 77 | /isaac-sim/python.sh render.py 78 | ``` 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /utils/losess.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helpers for various likelihood-based losses. These are ported from the original 3 | Ho et al. diffusion models codebase: 4 | https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py 5 | """ 6 | 7 | import numpy as np 8 | import torch as th 9 | 10 | def mse_to_vlb(t, mse, logvar_clipped): 11 | """ t: bs 12 | mse: bs 13 | """ 14 | if t == 0: 15 | return discretized_gaussian_log_likelihood() 16 | else: 17 | return 0.5 * ( 18 | # -1.0 19 | # + logvar2 20 | # - logvar1 21 | # + th.exp(logvar1 - logvar2) 22 | + mse * th.exp(-logvar_clipped[t]) / np.log(2.0) 23 | ) 24 | 25 | def normal_kl(mean1, logvar1, mean2, logvar2): 26 | """ 27 | Compute the KL divergence between two gaussians. 28 | 29 | Shapes are automatically broadcasted, so batches can be compared to 30 | scalars, among other use cases. 31 | """ 32 | tensor = None 33 | for obj in (mean1, logvar1, mean2, logvar2): 34 | if isinstance(obj, th.Tensor): 35 | tensor = obj 36 | break 37 | assert tensor is not None, "at least one argument must be a Tensor" 38 | 39 | # Force variances to be Tensors. Broadcasting helps convert scalars to 40 | # Tensors, but it does not work for th.exp(). 41 | logvar1, logvar2 = [ 42 | x if isinstance(x, th.Tensor) else th.tensor(x).to(tensor) 43 | for x in (logvar1, logvar2) 44 | ] 45 | 46 | return 0.5 * ( 47 | -1.0 48 | + logvar2 49 | - logvar1 50 | + th.exp(logvar1 - logvar2) 51 | + ((mean1 - mean2) ** 2) * th.exp(-logvar2) 52 | ) 53 | 54 | 55 | def approx_standard_normal_cdf(x): 56 | """ 57 | A fast approximation of the cumulative distribution function of the 58 | standard normal. 59 | """ 60 | return 0.5 * (1.0 + th.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * th.pow(x, 3)))) 61 | 62 | 63 | def discretized_gaussian_log_likelihood(x, *, means, log_scales): 64 | """ 65 | Compute the log-likelihood of a Gaussian distribution discretizing to a 66 | given image. 67 | 68 | :param x: the target images. It is assumed that this was uint8 values, 69 | rescaled to the range [-1, 1]. 70 | :param means: the Gaussian mean Tensor. 71 | :param log_scales: the Gaussian log stddev Tensor. 72 | :return: a tensor like x of log probabilities (in nats). 73 | """ 74 | assert x.shape == means.shape == log_scales.shape 75 | centered_x = x - means 76 | inv_stdv = th.exp(-log_scales) 77 | plus_in = inv_stdv * (centered_x + 1.0 / 255.0) 78 | cdf_plus = approx_standard_normal_cdf(plus_in) 79 | min_in = inv_stdv * (centered_x - 1.0 / 255.0) 80 | cdf_min = approx_standard_normal_cdf(min_in) 81 | log_cdf_plus = th.log(cdf_plus.clamp(min=1e-12)) 82 | log_one_minus_cdf_min = th.log((1.0 - cdf_min).clamp(min=1e-12)) 83 | cdf_delta = cdf_plus - cdf_min 84 | log_probs = th.where( 85 | x < -0.999, 86 | log_cdf_plus, 87 | th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))), 88 | ) 89 | assert log_probs.shape == x.shape 90 | return log_probs 91 | -------------------------------------------------------------------------------- /scripts/check_sceneflow.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | from omegaconf import DictConfig, OmegaConf 3 | from hydra.core.config_store import ConfigStore 4 | from config import Config, TrainingConfig, setup_hydra_configurations 5 | from data.data_loader import fetch_dataloader 6 | from utils.utils import seed_everything 7 | from accelerate import Accelerator 8 | from accelerate.logging import get_logger 9 | from tqdm import tqdm 10 | from utils.utils import Normalizer 11 | from utils.frame_utils import read_gen 12 | import torch.nn.functional as F 13 | import shutil 14 | 15 | import torch 16 | import numpy as np 17 | from PIL import Image 18 | 19 | import os 20 | logger = get_logger(__name__, log_level="INFO") # multi-process logging 21 | 22 | Accelerator() # hack: enable logging 23 | 24 | @hydra.main(version_base=None, config_path="conf", config_name="config.yaml") 25 | def check(config: Config): 26 | cfg = config.task 27 | logger.info(cfg.train_dataset) 28 | 29 | train_dataloader, val_dataloader_lst = fetch_dataloader(cfg) 30 | logger.info(val_dataloader_lst[0].dataset.__class__.__name__) 31 | 32 | all_dataloaders = [train_dataloader] 33 | all_dataloaders.extend(val_dataloader_lst) 34 | 35 | count = 0 36 | bads = {} 37 | 38 | for i, dataloader in enumerate([train_dataloader]): # all_dataloaders, val_dataloader_lst 39 | pbar = tqdm(total=len(dataloader)) 40 | for j, data in enumerate(dataloader): 41 | # print(data.keys()) 42 | B = data['mask'].shape[0] 43 | for b in range(B): 44 | # rgb = data['normalized_rgb'][b] 45 | index = data['index'][b] 46 | path = data['path'][b] 47 | 48 | raw_left = path.replace("disparity", "raw_cleanpass").replace("pfm", "png").replace("right", "left") 49 | # raw_right= path.replace("disparity", "raw_finalpass").replace("pfm", "png").replace("left", "right") 50 | 51 | raw_left = np.array(read_gen(raw_left)) 52 | gt_left = np.array(read_gen(path)) 53 | 54 | TP = ((raw_left > 0) & (np.abs(gt_left - raw_left) <= 2)).sum() 55 | FP = ((raw_left > 0) & (np.abs(gt_left - raw_left) > 2)).sum() 56 | FN = ((raw_left == 0) & (np.abs(gt_left - raw_left) <= 2)).sum() 57 | precision = TP / (TP + FP) 58 | recall = TP / (TP + FN) # biased 59 | 60 | # raw_right = read_gen(raw_right) 61 | 62 | # if precision < 0.6 and recall < 0.7: 63 | if precision < 0.2: 64 | bads[path] = precision 65 | logger.info(f"bad image {index}: {path}") 66 | 67 | if True: 68 | dump_dir = "./bad_sim" 69 | shutil.copy2(path, f"{dump_dir}/{j}_{b}_disp.pfm") 70 | shutil.copy2(path.replace("disparity", "raw_finalpass").replace("pfm", "png"), f"{dump_dir}/{j}_{b}_raw.png") 71 | shutil.copy2(path.replace("disparity", "raw_cleanpass").replace("pfm", "png"), f"{dump_dir}/{j}_{b}_raw_clean.png") 72 | shutil.copy2(path.replace("disparity", "frames_finalpass").replace("pfm", "png"), f"{dump_dir}/{j}_{b}_left.png") 73 | shutil.copy2(path.replace("disparity", "frames_finalpass").replace("pfm", "png").replace("left", "right"), f"{dump_dir}/{j}_{b}_right.png") 74 | 75 | count += 1 76 | 77 | pbar.update(1) 78 | 79 | logger.info(f"how many bad images? {len(bads.items())}") 80 | with open(f'bad_his.txt', 'w') as f: 81 | for path,epe in bads.items(): 82 | f.write(f"{path} {epe}\n") 83 | 84 | if __name__ == "__main__": 85 | seed_everything(0) 86 | setup_hydra_configurations() 87 | check() -------------------------------------------------------------------------------- /isaacsim/config/hssd.yaml: -------------------------------------------------------------------------------- 1 | launch_config: 2 | renderer: PathTracing #RayTracedLighting # 3 | headless: true # false # 4 | 5 | # Controls lightings for rendering images, 6 | # rgb: color image only 7 | # ir: ir depth image only 8 | # rgb+ir: iteratively render rgb and ir images 9 | # na: don't render images with replicators 10 | render_mode: rgb+ir # gt+rgb+ir # rgb+ir # rgb # ir # 11 | 12 | # Controls the simulation mode 13 | # layout_n_capture: init scene and capture images then quit 14 | # load_n_render: TODO load scene and render images 15 | # simulate: normal simulation mode 16 | 17 | sim_mode: load_n_render # layout_n_capture # simulate # 18 | 19 | resume_scene: 20 | 21 | robot: 22 | name: "franka.yml" #"galbot_zero_lefthand.yml" # 23 | init_pose: [-0.2, 0., 0., 1, 0, 0, 0] #[0.0, 0.5, 0.0] # usually look at, , 0.707, 0.0, 0.0, -0.707 24 | 25 | scene: empty #hssd # 26 | layout: part # dreds # graspnet # 27 | 28 | dreds: 29 | cad_model_dir: data/dreds 30 | layout_offset: [0.2, 0.0, 0.0] 31 | 32 | graspnet: 33 | root_path: data/graspnet 34 | layout_offset: [0.5, 0.2, 0.0] 35 | 36 | hssd: 37 | data_dir: data/hssd/scenes 38 | name: "107734119_175999932" 39 | default_prim_path: "/World/scene" 40 | scale: 1 41 | hide_ceilings: true 42 | hide_walls: false 43 | center_offset: [0.0, 0.0, 0.0] # [0.0, 0.0, 0.0] 44 | surface: 45 | category: teatable 46 | prim_path: /World/furniture/node_b914fb6bcc81386bfa1ff7a3eb8412b7ac581ff 47 | stt: false # specular or transparent, translucent surface 48 | 49 | seed: -1 # set to >= 0 to disable domain randomization 50 | rt_subframes: 8 51 | num_frames_per_surface: 3 52 | visualize: false 53 | render_after_quiet: true 54 | shadow: off 55 | 56 | viewport: 57 | record: false 58 | 59 | world: 60 | physics_dt: 0.016666667 # 0.01 # 61 | rendering_dt: 0.016666667 #0.005 # 62 | 63 | depth_sensor: 64 | name: realsense 65 | clipping_range: [0.1, 5] 66 | focal_length: 1.88 67 | # horizontal_aperture: 26.42033 68 | # vertical_aperture: 14.86144 69 | fov: 71.28 70 | resolution: [640, 360] # [1280, 720] # 71 | placement: # baseline = 0.055 72 | rgb_to_left_ir: 0.0 # 0.015 # 73 | rgb_to_right_ir: 0.055 # 0.070 # 74 | rgb_to_projector: 0.0410 # 0.0425 # 75 | projector: 76 | intensity: 5 77 | exposure: -1.0 78 | 79 | replicator: std_obj # graspnet # glass, articulated_obj 80 | domain_randomization: true 81 | 82 | lighting: 83 | light_type: [Sphere] # Rect # Disk # disk_light # 84 | range: #@see https://zh.wikipedia.org/zh-cn/%E7%90%83%E5%BA%A7%E6%A8%99%E7%B3%BB 85 | theta: [30, 90] 86 | phi: [-60, 60] 87 | radius: [1, 2] 88 | 89 | Distant_light: 90 | intensity: 0 91 | 92 | Sphere_light: 93 | radius: [1, 1] #[0.5, 1.0] 94 | height: [2.5, 2.5] #[1.5, 2] 95 | intensity: 96 | "on": [10000, 10000] # [7500, 11000] 97 | "off": [500, 500] # [200, 400] 98 | treatAsPoint: true 99 | 100 | Disk_light: 101 | radius: [1,1] # [0.5, 1.0] 102 | height: [1.5,1.5] #[1.5, 2] 103 | intensity: 104 | "on": [10000, 10000] #[6000, 9000] 105 | "off": [200, 400] 106 | 107 | Rect_light: 108 | width: [100, 100] 109 | height: [100, 100] 110 | intensity: 111 | "on": [50000, 50000] 112 | "off": [2000, 2000] 113 | 114 | specular: 115 | reflection_roughness_constant: [0.05, 0.2] # < 0.4 116 | metallic_constant: [0.8, 0.99] # > 0.9 117 | reflection_color: [0.0, 1.0] 118 | 119 | transparent: 120 | roughness_constant: [0.1, 0.1] # 0.05 121 | cutout_opacity: [0.1, 0.2] # [0.6, 0.7] # [0.2, 0.3] # < 0.4 122 | thin_walled: false #true 123 | glass_ior: [1.4, 1.6] # ~3, default: 1.491 124 | frosting_roughness: [0.2, 0.3] # < 0.1, grayscale only 125 | 126 | glass: 127 | base_alpha: [0.0, 1.0] 128 | ior: [1.4, 1.6] 129 | metallic_factor: [0.0, 0.35] 130 | roughness_factor: [0.0, 0.1] 131 | 132 | scope_name: /MyScope 133 | writer: on # off # BasicWriter 134 | writer_config: 135 | output_dir: output_ir 136 | start_sequence_id: -1 # -1 means continue from the existing frames, otherwise start with specified frame id 137 | rgb: true 138 | disparity: true 139 | normals: true # TODO 140 | # disparity: true 141 | # bounding_box_2d_tight: false 142 | semantic_segmentation: true 143 | distance_to_image_plane: true 144 | pointcloud: false 145 | # bounding_box_3d: false 146 | # occlusion: false 147 | clear_previous_semantics: true 148 | 149 | hydra: 150 | run: 151 | dir: _outputs/${hydra.job.name} 152 | job: 153 | chdir: true 154 | 155 | -------------------------------------------------------------------------------- /data/dataset.py: -------------------------------------------------------------------------------- 1 | from torchvision.transforms import RandomResizedCrop, InterpolationMode 2 | import torchvision.transforms.functional as TF 3 | import torch 4 | import functools 5 | 6 | class WarpDataset(torch.utils.data.Dataset): 7 | def __init__(self, image_size, augment): 8 | self.augment = augment 9 | self.rgb_list = [] 10 | self.depth_list = [] 11 | self.lr_list = [] 12 | self.mask_list = [] 13 | 14 | if self.augment is None: 15 | self.augment = dict() 16 | if type(image_size) == int: 17 | self.image_size = (image_size, image_size) # H x W 18 | elif type(image_size) == tuple: 19 | self.image_size = image_size 20 | else: 21 | raise ValueError("image_size must be int or tuple") 22 | return 23 | 24 | def data_aug(self, rgb, depth, mask, img1=None, img2=None, raw_depth=None): 25 | # random crop and resize. 26 | safe_apply = lambda func, x: func(x) if x is not None else None 27 | if 'resizedcrop' in self.augment.keys(): 28 | param = self.augment['resizedcrop'] 29 | i, j, h, w = RandomResizedCrop.get_params(rgb, scale=param['scale'], ratio=param['ratio']) 30 | resized_crop = lambda i, j, h, w, size, interp, x: TF.resized_crop(x, i, j, h, w, size=size, interpolation=interp) 31 | resized_crop_fn = functools.partial(resized_crop, i,j,h,w,self.image_size, InterpolationMode.NEAREST) 32 | rgb, mask, depth, img1, img2 = map(lambda x: safe_apply(resized_crop_fn, x), [rgb, mask, depth, img1, img2]) 33 | 34 | """ rgb = TF.resized_crop(rgb, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST) 35 | mask = TF.resized_crop(mask, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST) 36 | depth = TF.resized_crop(depth, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST) 37 | if img1 is not None: 38 | img1 = TF.resized_crop(img1, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST) 39 | img2 = TF.resized_crop(img2, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST) """ 40 | else: # only resize when eval and test 41 | resize = lambda size, interp, x: TF.resize(x, size=size, interpolation=interp) 42 | resize_fn = functools.partial(resize, self.image_size, InterpolationMode.NEAREST) 43 | rgb, mask, depth, img1, img2 = map(lambda x: safe_apply(resize_fn, x), [rgb, mask, depth, img1, img2]) 44 | 45 | # rgb = TF.resize(rgb, size=self.image_size, interpolation=InterpolationMode.NEAREST) 46 | # mask = TF.resize(mask, size=self.image_size, interpolation=InterpolationMode.NEAREST) 47 | # depth = TF.resize(depth, size=self.image_size, interpolation=InterpolationMode.NEAREST) 48 | # if img1 is not None: 49 | # img1 = TF.resize(img1, size=self.image_size, interpolation=InterpolationMode.NEAREST) 50 | # img2 = TF.resize(img2, size=self.image_size, interpolation=InterpolationMode.NEAREST) 51 | 52 | # Random hflip 53 | if 'hflip' in self.augment.keys(): 54 | param = self.augment['hflip'] 55 | if torch.rand(1) < 0.5: #param['prob']: 56 | rgb, mask, depth, img1, img2 = map(lambda x: safe_apply(TF.hflip, x), [rgb, mask, depth, img1, img2]) 57 | """ rgb = TF.hflip(rgb) 58 | mask = TF.hflip(mask) 59 | depth = TF.hflip(depth) 60 | if img1 is not None: 61 | img1 = TF.hflip(img1) 62 | img2 = TF.hflip(img2) """ 63 | 64 | # TODO add color augmentation such as changing the lighting 65 | 66 | if img1 is None: 67 | return rgb, depth, mask 68 | else: 69 | return rgb, depth, mask, img1, img2 70 | 71 | 72 | def normalize_depth(self, depth, mask, low_p=0.00, high_p=1.00): 73 | """ low_p, high_p: low and high percentile to normalize the depth""" 74 | mask = mask.bool() 75 | masked_depth = depth[mask] 76 | low, high = torch.quantile(masked_depth, torch.tensor((low_p, high_p))) 77 | 78 | depth = (depth - low) / (high - low) 79 | depth = (depth - 0.5) * 2 # [0,1] -> [-1, 1] 80 | return depth 81 | 82 | def normalize_rgb(self, rgb): 83 | return (rgb / 255 - 0.5) * 2 # [0,1] -> [-1, 1] 84 | 85 | def __mul__(self, v): 86 | self.rgb_list = v * self.rgb_list 87 | self.depth_list = v * self.depth_list 88 | self.lr_list = v * self.lr_list 89 | self.mask_list = v * self.mask_list 90 | return self 91 | 92 | def __len__(self): 93 | return len(self.rgb_list) 94 | -------------------------------------------------------------------------------- /core/praser.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import json 4 | from pathlib import Path 5 | from datetime import datetime 6 | from functools import partial 7 | import importlib 8 | from types import FunctionType 9 | import shutil 10 | def init_obj(opt, logger, *args, default_file_name='default file', given_module=None, init_type='Network', **modify_kwargs): 11 | """ 12 | finds a function handle with the name given as 'name' in config, 13 | and returns the instance initialized with corresponding args. 14 | """ 15 | if opt is None or len(opt)<1: 16 | logger.info('Option is None when initialize {}'.format(init_type)) 17 | return None 18 | 19 | ''' default format is dict with name key ''' 20 | if isinstance(opt, str): 21 | opt = {'name': opt} 22 | logger.warning('Config is a str, converts to a dict {}'.format(opt)) 23 | 24 | name = opt['name'] 25 | ''' name can be list, indicates the file and class name of function ''' 26 | if isinstance(name, list): 27 | file_name, class_name = name[0], name[1] 28 | else: 29 | file_name, class_name = default_file_name, name 30 | try: 31 | if given_module is not None: 32 | module = given_module 33 | else: 34 | module = importlib.import_module(file_name) 35 | 36 | attr = getattr(module, class_name) 37 | kwargs = opt.get('args', {}) 38 | kwargs.update(modify_kwargs) 39 | ''' import class or function with args ''' 40 | if isinstance(attr, type): 41 | ret = attr(*args, **kwargs) 42 | ret.__name__ = ret.__class__.__name__ 43 | elif isinstance(attr, FunctionType): 44 | ret = partial(attr, *args, **kwargs) 45 | ret.__name__ = attr.__name__ 46 | # ret = attr 47 | logger.info('{} [{:s}() form {:s}] is created.'.format(init_type, class_name, file_name)) 48 | except: 49 | raise NotImplementedError('{} [{:s}() form {:s}] not recognized.'.format(init_type, class_name, file_name)) 50 | return ret 51 | 52 | 53 | def mkdirs(paths): 54 | if isinstance(paths, str): 55 | os.makedirs(paths, exist_ok=True) 56 | else: 57 | for path in paths: 58 | os.makedirs(path, exist_ok=True) 59 | 60 | def get_timestamp(): 61 | return datetime.now().strftime('%y%m%d_%H%M%S') 62 | 63 | 64 | def write_json(content, fname): 65 | fname = Path(fname) 66 | with fname.open('wt') as handle: 67 | json.dump(content, handle, indent=4, sort_keys=False) 68 | 69 | class NoneDict(dict): 70 | def __missing__(self, key): 71 | return None 72 | 73 | def dict_to_nonedict(opt): 74 | """ convert to NoneDict, which return None for missing key. """ 75 | if isinstance(opt, dict): 76 | new_opt = dict() 77 | for key, sub_opt in opt.items(): 78 | new_opt[key] = dict_to_nonedict(sub_opt) 79 | return NoneDict(**new_opt) 80 | elif isinstance(opt, list): 81 | return [dict_to_nonedict(sub_opt) for sub_opt in opt] 82 | else: 83 | return opt 84 | 85 | def dict2str(opt, indent_l=1): 86 | """ dict to string for logger """ 87 | msg = '' 88 | for k, v in opt.items(): 89 | if isinstance(v, dict): 90 | msg += ' ' * (indent_l * 2) + k + ':[\n' 91 | msg += dict2str(v, indent_l + 1) 92 | msg += ' ' * (indent_l * 2) + ']\n' 93 | else: 94 | msg += ' ' * (indent_l * 2) + k + ': ' + str(v) + '\n' 95 | return msg 96 | 97 | def parse(args): 98 | json_str = '' 99 | with open(args.config, 'r') as f: 100 | for line in f: 101 | line = line.split('//')[0] + '\n' 102 | json_str += line 103 | opt = json.loads(json_str, object_pairs_hook=OrderedDict) 104 | 105 | ''' replace the config context using args ''' 106 | opt['phase'] = args.phase 107 | if args.gpu_ids is not None: 108 | opt['gpu_ids'] = [int(id) for id in args.gpu_ids.split(',')] 109 | if args.batch is not None: 110 | opt['datasets'][opt['phase']]['dataloader']['args']['batch_size'] = args.batch 111 | 112 | ''' set cuda environment ''' 113 | if len(opt['gpu_ids']) > 1: 114 | opt['distributed'] = True 115 | else: 116 | opt['distributed'] = False 117 | 118 | ''' update name ''' 119 | if args.debug: 120 | opt['name'] = 'debug_{}'.format(opt['name']) 121 | elif opt['finetune_norm']: 122 | opt['name'] = 'finetune_{}'.format(opt['name']) 123 | else: 124 | opt['name'] = '{}_{}'.format(opt['phase'], opt['name']) 125 | 126 | ''' set log directory ''' 127 | experiments_root = os.path.join(opt['path']['base_dir'], '{}_{}'.format(opt['name'], get_timestamp())) 128 | mkdirs(experiments_root) 129 | 130 | ''' save json ''' 131 | write_json(opt, '{}/config.json'.format(experiments_root)) 132 | 133 | ''' change folder relative hierarchy ''' 134 | opt['path']['experiments_root'] = experiments_root 135 | for key, path in opt['path'].items(): 136 | if 'resume' not in key and 'base' not in key and 'root' not in key: 137 | opt['path'][key] = os.path.join(experiments_root, path) 138 | mkdirs(opt['path'][key]) 139 | 140 | ''' debug mode ''' 141 | if 'debug' in opt['name']: 142 | opt['train'].update(opt['debug']) 143 | 144 | ''' code backup ''' 145 | for name in os.listdir('.'): 146 | if name in ['config', 'models', 'core', 'slurm', 'data']: 147 | shutil.copytree(name, os.path.join(opt['path']['code'], name), ignore=shutil.ignore_patterns("*.pyc", "__pycache__")) 148 | if '.py' in name or '.sh' in name: 149 | shutil.copy(name, opt['path']['code']) 150 | return dict_to_nonedict(opt) 151 | 152 | 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /core/resample.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | import numpy as np 4 | import torch as th 5 | import torch.distributed as dist 6 | 7 | 8 | def create_named_schedule_sampler(name, T, *args): 9 | """ 10 | Create a ScheduleSampler from a library of pre-defined samplers. 11 | 12 | :param name: the name of the sampler. 13 | :param diffusion: the diffusion object to sample for. 14 | """ 15 | if name == "uniform": 16 | return UniformSampler(T) 17 | elif name == "snr": 18 | return SNRSampler(T, *args) 19 | elif name == "loss-second-moment": 20 | return LossSecondMomentResampler(T) 21 | else: 22 | raise NotImplementedError(f"unknown schedule sampler: {name}") 23 | 24 | 25 | class ScheduleSampler(ABC): 26 | 27 | """ 28 | A distribution over timesteps in the diffusion process, intended to reduce 29 | variance of the objective. 30 | 31 | By default, samplers perform unbiased importance sampling, in which the 32 | objective's mean is unchanged. 33 | However, subclasses may override sample() to change how the resampled 34 | terms are reweighted, allowing for actual changes in the objective. 35 | """ 36 | 37 | @abstractmethod 38 | def weights(self): 39 | """ 40 | Get a numpy array of weights, one per diffusion step. 41 | 42 | The weights needn't be normalized, but must be positive. 43 | """ 44 | 45 | def sample(self, batch_size, device): 46 | """ 47 | Importance-sample timesteps for a batch. 48 | 49 | :param batch_size: the number of timesteps. 50 | :param device: the torch device to save to. 51 | :return: a tuple (timesteps, weights): 52 | - timesteps: a tensor of timestep indices. 53 | - weights: a tensor of weights to scale the resulting losses. 54 | """ 55 | w = self.weights() 56 | p = w / np.sum(w) 57 | indices_np = np.random.choice(len(p), size=(batch_size,), p=p) 58 | indices = th.from_numpy(indices_np).long().to(device) 59 | weights_np = 1 / (len(p) * p[indices_np]) 60 | weights = th.from_numpy(weights_np).float().to(device) 61 | return indices, weights 62 | 63 | 64 | class UniformSampler(ScheduleSampler): 65 | def __init__(self, T): 66 | self.T = T 67 | self._weights = np.ones([T]) 68 | 69 | def weights(self): 70 | return self._weights 71 | 72 | class SNRSampler(ScheduleSampler): 73 | def __init__(self, snr): 74 | self._snr = snr 75 | 76 | def weights(self): 77 | return self._snr 78 | 79 | class LossAwareSampler(ScheduleSampler): 80 | def update_with_local_losses(self, local_ts, local_losses): 81 | """ 82 | Update the reweighting using losses from a model. 83 | 84 | Call this method from each rank with a batch of timesteps and the 85 | corresponding losses for each of those timesteps. 86 | This method will perform synchronization to make sure all of the ranks 87 | maintain the exact same reweighting. 88 | 89 | :param local_ts: an integer Tensor of timesteps. 90 | :param local_losses: a 1D Tensor of losses. 91 | """ 92 | batch_sizes = [ 93 | th.tensor([0], dtype=th.int32, device=local_ts.device) 94 | for _ in range(dist.get_world_size()) 95 | ] 96 | dist.all_gather( 97 | batch_sizes, 98 | th.tensor([len(local_ts)], dtype=th.int32, device=local_ts.device), 99 | ) 100 | 101 | # Pad all_gather batches to be the maximum batch size. 102 | batch_sizes = [x.item() for x in batch_sizes] 103 | max_bs = max(batch_sizes) 104 | 105 | timestep_batches = [th.zeros(max_bs).to(local_ts) for bs in batch_sizes] 106 | loss_batches = [th.zeros(max_bs).to(local_losses) for bs in batch_sizes] 107 | dist.all_gather(timestep_batches, local_ts) 108 | dist.all_gather(loss_batches, local_losses) 109 | timesteps = [ 110 | x.item() for y, bs in zip(timestep_batches, batch_sizes) for x in y[:bs] 111 | ] 112 | losses = [x.item() for y, bs in zip(loss_batches, batch_sizes) for x in y[:bs]] 113 | self.update_with_all_losses(timesteps, losses) 114 | 115 | @abstractmethod 116 | def update_with_all_losses(self, ts, losses): 117 | """ 118 | Update the reweighting using losses from a model. 119 | 120 | Sub-classes should override this method to update the reweighting 121 | using losses from the model. 122 | 123 | This method directly updates the reweighting without synchronizing 124 | between workers. It is called by update_with_local_losses from all 125 | ranks with identical arguments. Thus, it should have deterministic 126 | behavior to maintain state across workers. 127 | 128 | :param ts: a list of int timesteps. 129 | :param losses: a list of float losses, one per timestep. 130 | """ 131 | 132 | 133 | class LossSecondMomentResampler(LossAwareSampler): 134 | def __init__(self, T, history_per_term=10, uniform_prob=0.001): 135 | self.T = T 136 | self.history_per_term = history_per_term 137 | self.uniform_prob = uniform_prob 138 | self._loss_history = np.zeros( 139 | [T, history_per_term], dtype=np.float64 140 | ) 141 | self._loss_counts = np.zeros([T], dtype=np.int32) 142 | 143 | def weights(self): 144 | if not self._warmed_up(): 145 | return np.ones([self.T], dtype=np.float64) 146 | weights = np.sqrt(np.mean(self._loss_history ** 2, axis=-1)) 147 | weights /= np.sum(weights) 148 | weights *= 1 - self.uniform_prob 149 | weights += self.uniform_prob / len(weights) 150 | return weights 151 | 152 | def update_with_all_losses(self, ts, losses): 153 | for t, loss in zip(ts, losses): 154 | if self._loss_counts[t] == self.history_per_term: 155 | # Shift out the oldest loss term. 156 | self._loss_history[t, :-1] = self._loss_history[t, 1:] 157 | self._loss_history[t, -1] = loss 158 | else: 159 | self._loss_history[t, self._loss_counts[t]] = loss 160 | self._loss_counts[t] += 1 161 | 162 | def _warmed_up(self): 163 | return (self._loss_counts == self.history_per_term).all() 164 | -------------------------------------------------------------------------------- /utils/ransac.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | import numpy as np 3 | from numpy.random import default_rng 4 | rng = default_rng() 5 | import torch 6 | import time 7 | from utils.utils import compute_scale_and_shift 8 | 9 | def square_error_loss(y_true, y_pred): 10 | return (y_true - y_pred) ** 2 11 | 12 | def mean_square_error(y_true, y_pred): 13 | return torch.sum(square_error_loss(y_true, y_pred)) / y_true.shape[0] 14 | 15 | def mean_absolute_error(y_true, y_pred): 16 | # return np.abs(y_true - y_pred).mean() 17 | return torch.abs(y_true - y_pred).mean(1) 18 | 19 | def mean_accuracy_inverse(y_true, y_pred): 20 | thresh = torch.maximum(y_true / y_pred, y_pred / y_true) 21 | return 1 / torch.mean((thresh < 1.25).float()) 22 | 23 | 24 | class ScaleShiftEstimator: 25 | def __init__(self): 26 | self.params = (1, 0) # s,t 27 | 28 | def fit(self, X: np.ndarray, Y: np.ndarray): 29 | """ X & Y: Nx1 """ 30 | start = time.time() 31 | self.params = compute_scale_and_shift(X, Y) 32 | end = time.time() 33 | print(f"ssi: {end - start:.5f}") 34 | return self 35 | 36 | def predict(self, X: np.ndarray): 37 | return X * self.params[0] + self.params[1] 38 | 39 | class RANSAC: 40 | def __init__(self, n=0.1, k=100, t=0.05, d=0.5, model=ScaleShiftEstimator(), loss=square_error_loss, metric=mean_accuracy_inverse): 41 | self.n = n # `n`: (percent) Minimum number of data points to estimate parameters 42 | self.k = k # `k`: Maximum iterations allowed 43 | self.t = t # `t`: Threshold value to determine if points are fit well 44 | self.d = d # `d`: (percent)Number of close data points required to assert model fits well 45 | self.model = model # `model`: class implementing `fit` and `predict` 46 | self.loss = loss # `loss`: function of `y_true` and `y_pred` that returns a vector 47 | self.metric = metric # `metric`: function of `y_true` and `y_pred` and returns a float 48 | self.best_fit = None 49 | self.best_error = None 50 | 51 | def fit(self, X, Y, mask): 52 | """ X: source 53 | Y: target 54 | """ 55 | assert X.shape == Y.shape == mask.shape 56 | B, HW = X.shape 57 | 58 | X = X.clone() 59 | Y = Y.clone() 60 | mask = mask.clone() 61 | N = int(self.n * HW) 62 | T = self.t 63 | # T = self.t * torch.abs(Y[mask.bool()]).mean() 64 | D = int(self.d * HW) 65 | 66 | assert D < HW and N < HW, "N, D must be less than HW" 67 | 68 | self.best_num_inlier = torch.zeros((B, 1), device=X.device).to(torch.int32) 69 | self.best_mask_inlier = torch.zeros((B, HW), device=X.device).to(torch.bool) 70 | self.best_error = torch.full((B, 1), torch.inf, device=X.device) 71 | self.best_fit = torch.empty((B, 2), device=X.device) 72 | self.best_fit[:,0] = 1.0 # init s=1, t=0 73 | self.best_fit[:,1] = 0.0 74 | 75 | for _ in range(self.k): 76 | ids = torch.randperm(HW, device=X.device).repeat(B, 1) # torch.arange(HW, device=X.device).repeat(B, 1) # 77 | maybe_inliers = ids[:, :N] 78 | maybe_model = compute_scale_and_shift( 79 | torch.gather(X, 1, maybe_inliers), 80 | torch.gather(Y, 1, maybe_inliers), 81 | torch.gather(mask, 1, maybe_inliers)) 82 | 83 | X_ = X * maybe_model[:, 0:1] + maybe_model[:,1:] 84 | threshold = torch.where(self.loss(Y, X_,) < T, 1, 0).to(torch.bool) & mask.bool() 85 | 86 | better_model = compute_scale_and_shift(X, Y, threshold) 87 | X__ = X * better_model[:, 0:1] + better_model[:, 1:] 88 | this_error = self.metric(Y, X__)[...,None] 89 | this_num_inlier = torch.sum(threshold, 1)[...,None] 90 | select = (this_num_inlier > D) & (this_error < self.best_error) 91 | 92 | self.best_num_inlier = torch.where(select, this_num_inlier, self.best_num_inlier) 93 | self.best_mask_inlier = torch.where(select, threshold, self.best_mask_inlier) 94 | self.best_fit = torch.where(select, better_model, self.best_fit) 95 | self.best_error = torch.where(select, this_error, self.best_error) 96 | return self 97 | 98 | def predict(self, X): 99 | return self.best_fit.predict(X) 100 | 101 | class LinearRegressor: 102 | def __init__(self): 103 | self.params = None 104 | 105 | def fit(self, X: np.ndarray, y: np.ndarray): 106 | r, _ = X.shape 107 | X = np.hstack([np.ones((r, 1)), X]) 108 | self.params = np.linalg.inv(X.T @ X) @ X.T @ y 109 | return self 110 | 111 | def predict(self, X: np.ndarray): 112 | r, _ = X.shape 113 | X = np.hstack([np.ones((r, 1)), X]) 114 | return X @ self.params 115 | 116 | 117 | if __name__ == "__main__": 118 | 119 | regressor = RANSAC(model=LinearRegressor(), loss=square_error_loss, metric=mean_square_error) 120 | 121 | X = np.array([-0.848,-0.800,-0.704,-0.632,-0.488,-0.472,-0.368,-0.336,-0.280,-0.200,-0.00800,-0.0840,0.0240,0.100,0.124,0.148,0.232,0.236,0.324,0.356,0.368,0.440,0.512,0.548,0.660,0.640,0.712,0.752,0.776,0.880,0.920,0.944,-0.108,-0.168,-0.720,-0.784,-0.224,-0.604,-0.740,-0.0440,0.388,-0.0200,0.752,0.416,-0.0800,-0.348,0.988,0.776,0.680,0.880,-0.816,-0.424,-0.932,0.272,-0.556,-0.568,-0.600,-0.716,-0.796,-0.880,-0.972,-0.916,0.816,0.892,0.956,0.980,0.988,0.992,0.00400]).reshape(-1,1) 122 | y = np.array([-0.917,-0.833,-0.801,-0.665,-0.605,-0.545,-0.509,-0.433,-0.397,-0.281,-0.205,-0.169,-0.0531,-0.0651,0.0349,0.0829,0.0589,0.175,0.179,0.191,0.259,0.287,0.359,0.395,0.483,0.539,0.543,0.603,0.667,0.679,0.751,0.803,-0.265,-0.341,0.111,-0.113,0.547,0.791,0.551,0.347,0.975,0.943,-0.249,-0.769,-0.625,-0.861,-0.749,-0.945,-0.493,0.163,-0.469,0.0669,0.891,0.623,-0.609,-0.677,-0.721,-0.745,-0.885,-0.897,-0.969,-0.949,0.707,0.783,0.859,0.979,0.811,0.891,-0.137]).reshape(-1,1) 123 | 124 | regressor.fit(X, y) 125 | 126 | import matplotlib.pyplot as plt 127 | plt.style.use("seaborn-darkgrid") 128 | fig, ax = plt.subplots(1, 1) 129 | ax.set_box_aspect(1) 130 | 131 | plt.scatter(X, y) 132 | 133 | line = np.linspace(-1, 1, num=100).reshape(-1, 1) 134 | plt.plot(line, regressor.predict(line), c="peru") 135 | # plt.show() 136 | plt.savefig("ransac.png") 137 | plt.close() -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | link all the datasets here, example folder structures: 2 | 3 | ``` 4 | datasets 5 | ├── clearpose -> /raid/songlin/Data/clearpose 6 | │ ├── clearpose_downsample_100 7 | │ │ ├── downsample.py 8 | │ │ ├── model 9 | │ │ ├── set1 10 | │ │ ├── set2 11 | │ │ ├── set3 12 | │ │ ├── set4 13 | │ │ ├── set5 14 | │ │ ├── set6 15 | │ │ ├── set7 16 | │ │ ├── set8 17 | │ │ └── set9 18 | │ ├── metadata 19 | │ │ ├── set1 20 | │ │ ├── set2 21 | │ │ ├── set3 22 | │ │ ├── set4 23 | │ │ ├── set5 24 | │ │ ├── set6 25 | │ │ ├── set7 26 | │ │ ├── set8 27 | │ │ └── set9 28 | │ ├── model 29 | │ │ ├── 003_cracker_box 30 | │ │ ├── 005_tomato_soup_can 31 | │ │ ├── 006_mustard_bottle 32 | │ │ ├── 007_tuna_fish_can 33 | │ │ ├── 009_gelatin_box 34 | │ │ ├── BBQSauce 35 | │ │ ├── beaker_1 36 | │ │ ├── bottle_1 37 | │ │ ├── bottle_2 38 | │ │ ├── bottle_3 39 | │ │ ├── bottle_4 40 | │ │ ├── bottle_5 41 | │ │ ├── bowl_1 42 | │ │ ├── bowl_2 43 | │ │ ├── bowl_3 44 | │ │ ├── bowl_4 45 | │ │ ├── bowl_5 46 | │ │ ├── bowl_6 47 | │ │ ├── container_1 48 | │ │ ├── container_2 49 | │ │ ├── container_3 50 | │ │ ├── container_4 51 | │ │ ├── container_5 52 | │ │ ├── create_keypoints.py 53 | │ │ ├── dropper_1 54 | │ │ ├── dropper_2 55 | │ │ ├── flask_1 56 | │ │ ├── fork_1 57 | │ │ ├── funnel_1 58 | │ │ ├── graduated_cylinder_1 59 | │ │ ├── graduated_cylinder_2 60 | │ │ ├── knife_1 61 | │ │ ├── knife_2 62 | │ │ ├── Mayo 63 | │ │ ├── mug_1 64 | │ │ ├── mug_2 65 | │ │ ├── OrangeJuice 66 | │ │ ├── pan_1 67 | │ │ ├── pan_2 68 | │ │ ├── pan_3 69 | │ │ ├── pitcher_1 70 | │ │ ├── plate_1 71 | │ │ ├── plate_2 72 | │ │ ├── reagent_bottle_1 73 | │ │ ├── reagent_bottle_2 74 | │ │ ├── round_table 75 | │ │ ├── spoon_1 76 | │ │ ├── spoon_2 77 | │ │ ├── stick_1 78 | │ │ ├── syringe_1 79 | │ │ ├── trans_models.blend 80 | │ │ ├── trans_models_keypoint.blend 81 | │ │ ├── trans_models_keypoint.blend1 82 | │ │ ├── trans_models_keypoint (copy).blend 83 | │ │ ├── trans_models_kp.blend 84 | │ │ ├── water_cup_1 85 | │ │ ├── water_cup_10 86 | │ │ ├── water_cup_11 87 | │ │ ├── water_cup_12 88 | │ │ ├── water_cup_13 89 | │ │ ├── water_cup_14 90 | │ │ ├── water_cup_2 91 | │ │ ├── water_cup_3 92 | │ │ ├── water_cup_4 93 | │ │ ├── water_cup_5 94 | │ │ ├── water_cup_6 95 | │ │ ├── water_cup_7 96 | │ │ ├── water_cup_8 97 | │ │ ├── water_cup_9 98 | │ │ ├── wine_cup_1 99 | │ │ ├── wine_cup_2 100 | │ │ ├── wine_cup_3 101 | │ │ ├── wine_cup_4 102 | │ │ ├── wine_cup_5 103 | │ │ ├── wine_cup_6 104 | │ │ ├── wine_cup_7 105 | │ │ ├── wine_cup_8 106 | │ │ └── wine_cup_9 107 | │ ├── set1 108 | │ │ ├── scene1 109 | │ │ ├── scene2 110 | │ │ ├── scene3 111 | │ │ ├── scene4 112 | │ │ └── scene5 113 | │ ├── set2 114 | │ │ ├── scene1 115 | │ │ ├── scene3 116 | │ │ ├── scene4 117 | │ │ ├── scene5 118 | │ │ └── scene6 119 | │ ├── set3 120 | │ │ ├── scene1 121 | │ │ ├── scene11 122 | │ │ ├── scene3 123 | │ │ ├── scene4 124 | │ │ └── scene8 125 | │ ├── set4 126 | │ │ ├── scene1 127 | │ │ ├── scene2 128 | │ │ ├── scene3 129 | │ │ ├── scene4 130 | │ │ ├── scene5 131 | │ │ └── scene6 132 | │ ├── set5 133 | │ │ ├── scene1 134 | │ │ ├── scene2 135 | │ │ ├── scene3 136 | │ │ ├── scene4 137 | │ │ ├── scene5 138 | │ │ └── scene6 139 | │ ├── set6 140 | │ │ ├── scene1 141 | │ │ ├── scene2 142 | │ │ ├── scene3 143 | │ │ ├── scene4 144 | │ │ ├── scene5 145 | │ │ └── scene6 146 | │ ├── set7 147 | │ │ ├── scene1 148 | │ │ ├── scene2 149 | │ │ ├── scene3 150 | │ │ ├── scene4 151 | │ │ ├── scene5 152 | │ │ └── scene6 153 | │ ├── set8 154 | │ │ ├── scene1 155 | │ │ ├── scene2 156 | │ │ ├── scene3 157 | │ │ ├── scene4 158 | │ │ ├── scene5 159 | │ │ └── scene6 160 | │ └── set9 161 | │ ├── scene10 162 | │ ├── scene11 163 | │ ├── scene12 164 | │ ├── scene7 165 | │ ├── scene8 166 | │ └── scene9 167 | ├── DREDS 168 | │ ├── test -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/test 169 | │ │ └── shapenet_generate_1216_val_novel 170 | │ ├── test_std_catknown -> /raid/songlin/Data/DREDS_ECCV2022/STD-CatKnown 171 | │ │ ├── test_0 172 | │ │ ├── test_14-1 173 | │ │ ├── test_18-1 174 | │ │ ├── test_19 175 | │ │ ├── test_20-3 176 | │ │ ├── test_3-2 177 | │ │ ├── test_4-2 178 | │ │ ├── test_5-2 179 | │ │ ├── test_6-1 180 | │ │ ├── test_7-1 181 | │ │ ├── test_8 182 | │ │ ├── test_9-2 183 | │ │ ├── train_0-5 184 | │ │ ├── train_10-1 185 | │ │ ├── train_12 186 | │ │ ├── train_1-4 187 | │ │ ├── train_14-1 188 | │ │ ├── train_16-2 189 | │ │ ├── train_17-1 190 | │ │ ├── train_19-1 191 | │ │ ├── train_3 192 | │ │ ├── train_4-1 193 | │ │ ├── train_7-1 194 | │ │ ├── train_8 195 | │ │ └── train_9-3 196 | │ ├── test_std_catnovel -> /raid/songlin/Data/DREDS_ECCV2022/STD-CatNovel 197 | │ │ └── real_data_novel 198 | │ ├── train -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/train 199 | │ │ ├── part0 200 | │ │ ├── part1 201 | │ │ ├── part2 202 | │ │ ├── part3 203 | │ │ └── part4 204 | │ └── val -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/val 205 | │ └── shapenet_generate_1216 206 | ├── HISS 207 | │ ├── train -> /raid/songlin/Data/hssd-isaac-sim-100k 208 | │ │ ├── 102344049 209 | │ │ ├── 102344280 210 | │ │ ├── 103997586_171030666 211 | │ │ ├── 107734119_175999932 212 | │ │ └── bad_his.txt 213 | │ └── val -> /raid/songlin/Data/hssd-isaac-sim-300hq 214 | │ ├── 102344049 215 | │ ├── 102344280 216 | │ ├── 103997586_171030666 217 | │ ├── 107734119_175999932 218 | │ ├── bad_his.txt 219 | │ └── simulation2 220 | ├── README.md 221 | ├── Real 222 | │ └── xiaomeng 223 | │ ├── 0000_depth.png 224 | │ ├── 0000_ir_l.png 225 | │ ├── 0000_ir_r.png 226 | │ ├── 0000_raw_disparity.png 227 | │ ├── 0000_rgb.png 228 | │ └── intrinsics.txt 229 | └── sceneflow -> /raid/songlin/Data/sceneflow 230 | ├── bad_sceneflow_test.txt 231 | ├── bad_sceneflow_train.txt 232 | ├── Driving 233 | │ ├── disparity 234 | │ ├── frames_cleanpass 235 | │ ├── frames_finalpass 236 | │ ├── raw_cleanpass 237 | │ └── raw_finalpass 238 | ├── FlyingThings3D 239 | │ ├── disparity 240 | │ ├── frames_cleanpass 241 | │ ├── frames_finalpass 242 | │ ├── raw_cleanpass 243 | │ └── raw_finalpass 244 | └── Monkaa 245 | ├── disparity 246 | ├── frames_cleanpass 247 | ├── frames_finalpass 248 | ├── raw_cleanpass 249 | └── raw_finalpass 250 | 251 | 227 directories, 18 files 252 | 253 | ``` 254 | -------------------------------------------------------------------------------- /distributed_evaluate.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import math 3 | import argparse 4 | import torch 5 | import logging 6 | from tqdm import tqdm 7 | 8 | from core.custom_pipelines import GuidedLatentDiffusionPipeline 9 | from accelerate import Accelerator, PartialState 10 | from core.guidance import FlowGuidance 11 | import numpy as np 12 | from utils.utils import seed_everything 13 | from config import TrainingConfig, create_sampler 14 | from diffusers import UNet2DModel, DDIMScheduler 15 | from utils.utils import InputPadder, metrics_to_dict, pretty_json 16 | from accelerate.logging import get_logger 17 | from utils.camera import plot_error_map 18 | from evaluate import eval_batch 19 | from data.stereo_datasets import * 20 | from data.mono_datasets import * 21 | 22 | import hydra 23 | from config import Config, TrainingConfig, create_sampler, setup_hydra_configurations 24 | 25 | logger = get_logger(__name__, log_level="INFO") 26 | 27 | @hydra.main(version_base=None, config_path="conf", config_name="config.yaml") 28 | def run_distributed_eval(base_cfg: Config): 29 | if base_cfg.seed != -1: 30 | seed_everything(base_cfg.seed) # for reproducing 31 | 32 | accelerator = Accelerator() # hack: enable logging 33 | 34 | config = base_cfg.task 35 | assert len(config.eval_dataset) == 1, "only support single dataset for evaluation" 36 | 37 | inputPadder = InputPadder(config.image_size, divis_by=8) 38 | # config.camera # hack init default camera 39 | 40 | patrained_path = f"{config.resume_pretrained}" 41 | if os.path.exists(patrained_path): 42 | logger.info(f"load weights from {patrained_path}") 43 | """ pipeline = GuidedLatentDiffusionPipeline.from_pretrained(patrained_path).to("cuda") 44 | # model = UNet2DConditionModel.from_pretrained(patrained_path) 45 | 46 | from diffusers import DDIMScheduler 47 | ddim = DDIMScheduler.from_config(dict( 48 | beta_schedule = config.beta_schedule, # "scaled_linear", 49 | beta_start = config.beta_start, # 0.00085, 50 | beta_end = config.beta_end, # 0.012, 51 | clip_sample = config.clip_sample, # False, 52 | num_train_timesteps = config.num_train_timesteps, # 1000, 53 | prediction_type = config.prediction_type, # #"v_prediction", 54 | set_alpha_to_one = False, 55 | skip_prk_steps = True, 56 | steps_offset = 1, 57 | trained_betas = None 58 | )) 59 | pipeline.scheduler = ddim """ 60 | 61 | from core.custom_pipelines import GuidedDiffusionPipeline, GuidedLatentDiffusionPipeline 62 | clazz_pipeline = GuidedLatentDiffusionPipeline if config.ldm else GuidedDiffusionPipeline 63 | pipeline = clazz_pipeline.from_pretrained(patrained_path).to("cuda") 64 | pipeline.guidance.flow_guidance_mode=config.flow_guidance_mode 65 | 66 | pipeline.scheduler = create_sampler(config, train=False) 67 | else: 68 | raise ValueError(f"patrained path not exists: {patrained_path}") 69 | 70 | if config.eval_output: 71 | eval_output_dir = f"{config.resume_pretrained}/{config.eval_output}" 72 | else: 73 | eval_output_dir = f"{config.resume_pretrained}/dist.{config.eval_dataset[0]}.g.{config.guide_source}.b{config.eval_num_batch}.{config.eval_split}" 74 | 75 | if not os.path.exists(eval_output_dir): 76 | os.makedirs(eval_output_dir, exist_ok=True) 77 | 78 | logger.logger.addHandler(logging.FileHandler(f"{eval_output_dir}/eval.log")) 79 | logger.logger.addHandler(logging.StreamHandler(sys.stdout)) 80 | logger.info(f"eval output dir: {eval_output_dir}") 81 | 82 | from data.data_loader import create_dataset 83 | val_dataset = create_dataset(config, config.eval_dataset[0], split = config.eval_split) 84 | # print(f"eval_batch_size={config.eval_batch_size}"); exit(0) 85 | val_dataloader = torch.utils.data.DataLoader(val_dataset, 86 | batch_size=config.eval_batch_size, 87 | shuffle=True, 88 | pin_memory=False, 89 | drop_last=False) 90 | 91 | """ if type(model.sample_size) == list: 92 | model.sample_size[0] = inputPadder.padded_size[0] 93 | model.sample_size[1] = inputPadder.padded_size[1] """ 94 | 95 | # distributed evaluation 96 | val_dataloader = accelerator.prepare(val_dataloader) 97 | 98 | pbar = tqdm(total=len(val_dataloader), desc="Eval", disable=not accelerator.is_local_main_process, position=0) 99 | disable_bar = not accelerator.is_local_main_process 100 | distributed_state = PartialState() 101 | 102 | w = config.flow_guidance_weights[0] 103 | if accelerator.is_local_main_process: 104 | logger.info(f"guided by {config.guide_source}") 105 | 106 | disp_metrics = [] 107 | depth_metrics = [] 108 | total = 0 109 | for i, batch in enumerate(val_dataloader): 110 | if config.eval_num_batch > 0 and i >= config.eval_num_batch: 111 | break 112 | 113 | normalized_rgbs = batch["normalized_rgb"] 114 | gt_images = batch["normalized_disp"] 115 | raw_disps = batch["raw_disp"] 116 | left_images = batch["left_image"] if "left_image" in batch else None 117 | right_images = batch["right_image"] if "right_image" in batch else None 118 | depth_images = batch["depth"] if "depth" in batch else None 119 | gt_masks = batch["mask"] 120 | fxb = batch["fxb"] 121 | sim_disps = batch["sim_disp"] if "sim_disp" in batch else None 122 | 123 | B = normalized_rgbs.shape[0] 124 | # assert not torch.any(gt_images[gt_masks.to(torch.bool)] == 0.0), "dataset bug" 125 | if config.guide_source is None: 126 | pass 127 | 128 | elif config.guide_source == "raft-stereo": 129 | pass 130 | 131 | elif config.guide_source == "stereo-match": 132 | pass 133 | 134 | elif config.guide_source == "raw-depth": 135 | guidance_image = batch["raw_depth"] # raw 136 | valid = guidance_image > 0 137 | 138 | elif config.guide_source == "gt": 139 | guidance_image = batch["depth"] # gt 140 | valid = guidance_image > 0 141 | else: 142 | raise ValueError(f"Unknown guidance mode: {config.guide_source}") 143 | 144 | if config.guide_source is not None: 145 | pipeline.guidance.prepare(guidance_image, valid, "depth") # disp 146 | pipeline.guidance.flow_guidance_weight = w 147 | 148 | pred_disps, metrics_, uncertainties, error, intermediates = eval_batch(config, pipeline, disable_bar, fxb, normalized_rgbs, 149 | raw_disps, gt_masks, left_images, right_images, sim_disps) 150 | metrics = metrics_to_dict(*metrics_) 151 | logger.info(f"metrics(w={w}):{pretty_json(metrics)}") 152 | 153 | disp_err = torch.from_numpy(metrics_[0]).to(distributed_state.device) # to be gathered 154 | depth_err = torch.from_numpy(metrics_[1]).to(distributed_state.device) 155 | 156 | if config.plot_error_map: 157 | fname = lambda name: f"{eval_output_dir}/idx{i}_w{w}_pid{distributed_state.process_index}_{name}" 158 | error_map = plot_error_map(error) 159 | error_map.save(fname("error.png")) 160 | 161 | # gather all batch results 162 | gathered_disp_err = accelerator.gather_for_metrics(disp_err) 163 | gathered_depth_err = accelerator.gather_for_metrics(depth_err) 164 | 165 | disp_metrics.extend(gathered_disp_err) 166 | depth_metrics.extend(gathered_depth_err) 167 | total += gathered_disp_err.shape[0] 168 | 169 | pbar.update(1) 170 | 171 | # whole val set results 172 | gathered_metrics = metrics_to_dict(torch.vstack(disp_metrics).cpu().numpy(), torch.vstack(depth_metrics).cpu().numpy()) 173 | logger.info(f"final metrics:{pretty_json(gathered_metrics)}") 174 | logger.info(f"total evaluated {total} samples, please check if correct") 175 | 176 | if __name__ == "__main__": 177 | setup_hydra_configurations() 178 | run_distributed_eval() -------------------------------------------------------------------------------- /isaacsim/utils_func.py: -------------------------------------------------------------------------------- 1 | import os, re, math 2 | import numpy as np 3 | from typing import Union, Type, List, Tuple 4 | from pxr import Gf, Sdf, Usd, UsdGeom 5 | from omni.isaac.core.utils.prims import get_prim_at_path 6 | import transforms3d 7 | import omni 8 | 9 | def find_next_sequence_id(output_dir): 10 | import glob 11 | import os 12 | files = sorted(glob.glob(os.path.join(output_dir, "*.png")), reverse=True) 13 | if len(files) == 0: 14 | return 0 15 | return int(files[0].split("/")[-1].split("_")[0]) + 1 16 | 17 | def get_visibility_attribute( 18 | stage: Usd.Stage, prim_path: str 19 | ) -> Union[Usd.Attribute, None]: 20 | #Return the visibility attribute of a prim 21 | path = Sdf.Path(prim_path) 22 | prim = stage.GetPrimAtPath(path) 23 | if not prim.IsValid(): 24 | return None 25 | visibility_attribute = prim.GetAttribute("visibility") 26 | return visibility_attribute 27 | 28 | def get_all_child_mesh(parent_prim: Usd.Prim) -> Usd.Prim: 29 | # Iterates only active, loaded, defined, non-abstract children 30 | mesh_prims = [] 31 | for model_prim in parent_prim.GetChildren(): 32 | if "model" in model_prim.GetPath().pathString: 33 | for child_prim in model_prim.GetChildren(): 34 | if child_prim.IsA(UsdGeom.Mesh): 35 | mesh_prims.append(child_prim) 36 | return mesh_prims 37 | 38 | def create_materials(self, stage, num, opacity): 39 | MDL = "OmniPBR.mdl" 40 | # MDL = "OmniGlass.mdl" 41 | mtl_name, _ = os.path.splitext(MDL) 42 | MAT_PATH = "/World/Looks" 43 | materials = [] 44 | for _ in range(num): 45 | prim_path = omni.usd.get_stage_next_free_path(stage, f"{MAT_PATH}/{mtl_name}", False) 46 | mat = self.create_omnipbr_material(mtl_url=MDL, mtl_name=mtl_name, mtl_path=prim_path, cutout_opacity=opacity) 47 | materials.append(mat) 48 | return materials 49 | 50 | def parse_quadrant(q): 51 | """ x+-y+-z+-, in isaac sim hssd coordinate system """ 52 | x_, y_, z_ = q.split(',') 53 | if y_[1:] == '+': 54 | theta = [0, np.pi/2] 55 | elif y_[1:] == '-': 56 | theta = [np.pi/2, np.pi] 57 | else: 58 | theta = [0, np.pi] 59 | 60 | if z_[1:] == '+': 61 | phi = [0, np.pi/2] 62 | elif z_[1:] == '-': 63 | phi = [np.pi/2, np.pi] 64 | else: 65 | phi = [0, np.pi] 66 | 67 | return theta, phi 68 | 69 | def grasp_pose_in_robot(target_grasp, graspnet_offset = np.array([0,0,0])): 70 | T_table_grasp = np.eye(4) 71 | T_table_grasp[:3, :3] = transforms3d.quaternions.quat2mat(target_grasp['orientation']) 72 | T_table_grasp[:3, 3] = target_grasp['position'] 73 | 74 | T_world_table = np.eye(4) 75 | # TODO random table rotation around z 76 | T_world_table[:3, 3] = graspnet_offset 77 | 78 | T_grasp_ee = np.array([ 79 | [0, 0, 1, 0], 80 | [0, -1, 0, 0], 81 | [1, 0, 0, 0], 82 | [0, 0, 0, 1] 83 | ]) 84 | 85 | T_robot_world = np.eye(4) # should be always be identity due to curobo limitation 86 | T_ee_hand = np.eye(4) 87 | T_ee_hand[:3, 3] = np.array([0, 0, -0.10]) 88 | 89 | """ T_robot_hand: base_link -> panda_hand """ 90 | T_robot_hand = T_robot_world @ T_world_table @ T_table_grasp @ T_grasp_ee @ T_ee_hand 91 | target_pose = { 92 | 'position' : T_robot_hand[:3, 3], 93 | 'orientation' : transforms3d.quaternions.mat2quat(T_robot_hand[:3, :3]) 94 | } 95 | return target_pose 96 | 97 | def compute_obb(bbox_cache: UsdGeom.BBoxCache, prim_path: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 98 | """Computes the Oriented Bounding Box (OBB) of a prim 99 | 100 | .. note:: 101 | 102 | * The OBB does not guarantee the smallest possible bounding box, it rotates and scales the default AABB. 103 | * The rotation matrix incorporates any scale factors applied to the object. 104 | * The `half_extent` values do not include these scaling effects. 105 | 106 | Args: 107 | bbox_cache (UsdGeom.BBoxCache): USD Bounding Box Cache object to use for computation 108 | prim_path (str): Prim path to compute OBB for 109 | 110 | Returns: 111 | Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing the following OBB information: 112 | - The centroid of the OBB as a NumPy array. 113 | - The axes of the OBB as a 2D NumPy array, where each row represents a different axis. 114 | - The half extent of the OBB as a NumPy array. 115 | 116 | Example: 117 | 118 | .. code-block:: python 119 | 120 | >>> import omni.isaac.core.utils.bounds as bounds_utils 121 | >>> 122 | >>> # 1 stage unit length cube centered at (0.0, 0.0, 0.0) 123 | >>> cache = bounds_utils.create_bbox_cache() 124 | >>> centroid, axes, half_extent = bounds_utils.compute_obb(cache, prim_path="/World/Cube") 125 | >>> centroid 126 | [0. 0. 0.] 127 | >>> axes 128 | [[1. 0. 0.] 129 | [0. 1. 0.] 130 | [0. 0. 1.]] 131 | >>> half_extent 132 | [0.5 0.5 0.5] 133 | >>> 134 | >>> # the same cube rotated 45 degrees around the z-axis 135 | >>> cache = bounds_utils.create_bbox_cache() 136 | >>> centroid, axes, half_extent = bounds_utils.compute_obb(cache, prim_path="/World/Cube") 137 | >>> centroid 138 | [0. 0. 0.] 139 | >>> axes 140 | [[ 0.70710678 0.70710678 0. ] 141 | [-0.70710678 0.70710678 0. ] 142 | [ 0. 0. 1. ]] 143 | >>> half_extent 144 | [0.5 0.5 0.5] 145 | """ 146 | # Compute the BBox3d for the prim 147 | prim = get_prim_at_path(prim_path) 148 | bound = bbox_cache.ComputeWorldBound(prim) 149 | 150 | # Compute the translated centroid of the world bound 151 | centroid = bound.ComputeCentroid() 152 | 153 | # Compute the axis vectors of the OBB 154 | # NOTE: The rotation matrix incorporates the scale factors applied to the object 155 | rotation_matrix = bound.GetMatrix().ExtractRotationMatrix() 156 | x_axis = rotation_matrix.GetRow(0) 157 | y_axis = rotation_matrix.GetRow(1) 158 | z_axis = rotation_matrix.GetRow(2) 159 | 160 | # Compute the half-lengths of the OBB along each axis 161 | # NOTE the size/extent values do not include any scaling effects 162 | half_extent = bound.GetRange().GetSize() * 0.5 163 | 164 | return np.array([*centroid]), np.array([[*x_axis], [*y_axis], [*z_axis]]), np.array(half_extent) 165 | 166 | def get_obb_corners(centroid: np.ndarray, axes: np.ndarray, half_extent: np.ndarray) -> np.ndarray: 167 | """Computes the corners of the Oriented Bounding Box (OBB) from the given OBB information 168 | 169 | Args: 170 | centroid (np.ndarray): The centroid of the OBB as a NumPy array. 171 | axes (np.ndarray): The axes of the OBB as a 2D NumPy array, where each row represents a different axis. 172 | half_extent (np.ndarray): The half extent of the OBB as a NumPy array. 173 | 174 | Returns: 175 | np.ndarray: NumPy array of shape (8, 3) containing each corner location of the OBB 176 | 177 | :math:`c_0 = (x_{min}, y_{min}, z_{min})` 178 | |br| :math:`c_1 = (x_{min}, y_{min}, z_{max})` 179 | |br| :math:`c_2 = (x_{min}, y_{max}, z_{min})` 180 | |br| :math:`c_3 = (x_{min}, y_{max}, z_{max})` 181 | |br| :math:`c_4 = (x_{max}, y_{min}, z_{min})` 182 | |br| :math:`c_5 = (x_{max}, y_{min}, z_{max})` 183 | |br| :math:`c_6 = (x_{max}, y_{max}, z_{min})` 184 | |br| :math:`c_7 = (x_{max}, y_{max}, z_{max})` 185 | 186 | Example: 187 | 188 | .. code-block:: python 189 | 190 | >>> import omni.isaac.core.utils.bounds as bounds_utils 191 | >>> 192 | >>> cache = bounds_utils.create_bbox_cache() 193 | >>> centroid, axes, half_extent = bounds_utils.compute_obb(cache, prim_path="/World/Cube") 194 | >>> bounds_utils.get_obb_corners(centroid, axes, half_extent) 195 | [[-0.5 -0.5 -0.5] 196 | [-0.5 -0.5 0.5] 197 | [-0.5 0.5 -0.5] 198 | [-0.5 0.5 0.5] 199 | [ 0.5 -0.5 -0.5] 200 | [ 0.5 -0.5 0.5] 201 | [ 0.5 0.5 -0.5] 202 | [ 0.5 0.5 0.5]] 203 | """ 204 | corners = [ 205 | centroid - axes[0] * half_extent[0] - axes[1] * half_extent[1] - axes[2] * half_extent[2], 206 | centroid - axes[0] * half_extent[0] - axes[1] * half_extent[1] + axes[2] * half_extent[2], 207 | centroid - axes[0] * half_extent[0] + axes[1] * half_extent[1] - axes[2] * half_extent[2], 208 | centroid - axes[0] * half_extent[0] + axes[1] * half_extent[1] + axes[2] * half_extent[2], 209 | centroid + axes[0] * half_extent[0] - axes[1] * half_extent[1] - axes[2] * half_extent[2], 210 | centroid + axes[0] * half_extent[0] - axes[1] * half_extent[1] + axes[2] * half_extent[2], 211 | centroid + axes[0] * half_extent[0] + axes[1] * half_extent[1] - axes[2] * half_extent[2], 212 | centroid + axes[0] * half_extent[0] + axes[1] * half_extent[1] + axes[2] * half_extent[2], 213 | ] 214 | return np.array(corners) 215 | -------------------------------------------------------------------------------- /scripts/check_stereo.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | from omegaconf import DictConfig, OmegaConf 3 | from hydra.core.config_store import ConfigStore 4 | from config import Config, TrainingConfig, setup_hydra_configurations 5 | from data.data_loader import fetch_dataloader 6 | from utils.utils import seed_everything 7 | from accelerate import Accelerator 8 | from accelerate.logging import get_logger 9 | from tqdm import tqdm 10 | from utils.utils import Normalizer 11 | import torch.nn.functional as F 12 | 13 | import torch 14 | import numpy as np 15 | from PIL import Image 16 | 17 | logger = get_logger(__name__, log_level="INFO") # multi-process logging 18 | 19 | Accelerator() # hack: enable logging 20 | 21 | @hydra.main(version_base=None, config_path="conf", config_name="config.yaml") 22 | def check(config: Config): 23 | cfg = config.task 24 | logger.info(cfg.train_dataset) 25 | 26 | from utils.camera import DepthCamera, Realsense 27 | from functools import partial 28 | from utils import frame_utils 29 | sim_camera = DepthCamera.from_device("sim") 30 | # sim_camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}") 31 | sim_camera.change_resolution(cfg.camera_resolution) 32 | disp_reader = partial(frame_utils.readDispReal, sim_camera) 33 | 34 | # sim_disp, sim_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_simDepthImage.exr") 35 | # sim_disp, sim_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_simDispImage.png") 36 | # raw_disp, raw_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_depth.exr") 37 | 38 | # epe = np.abs(sim_disp[sim_valid] - raw_disp[sim_valid]).mean() 39 | # assert epe < 1, f"bad quality sim disp, epe={epe}" 40 | 41 | train_dataloader, val_dataloader_lst = fetch_dataloader(cfg) 42 | logger.info(val_dataloader_lst[0].dataset.__class__.__name__) 43 | 44 | all_dataloaders = [train_dataloader] 45 | all_dataloaders.extend(val_dataloader_lst) 46 | bad = [] 47 | 48 | stats = { 49 | 'mean': [], 50 | 'med': [], 51 | 'min': [], 52 | 'max': [], 53 | 'std': [] 54 | } 55 | 56 | stats_norm = { 57 | 'mean': [], 58 | 'med': [], 59 | 'min': [], 60 | 'max': [], 61 | 'std': [] 62 | } 63 | count = 0 64 | 65 | norm = Normalizer.from_config(cfg) 66 | 67 | bads = {} 68 | 69 | for i, dataloader in enumerate(val_dataloader_lst): # all_dataloaders, [train_dataloader] 70 | pbar = tqdm(total=len(dataloader)) 71 | for j, data in enumerate(dataloader): 72 | # print(data.keys()) 73 | B = data['mask'].shape[0] 74 | for b in range(B): 75 | mask = data['mask'][b] 76 | # sim_mask = data['sim_mask'][b] 77 | 78 | disp = data['raw_disp'][b] 79 | disp_norm = data["normalized_disp"][b] 80 | # rgb = data['normalized_rgb'][b] 81 | index = data['index'][b] 82 | path = data['path'][b] 83 | 84 | # sim_disp = data["sim_disp_unnorm"][b] 85 | # sim_valid = data["sim_mask"][b].bool() 86 | 87 | stats['mean'].append(disp.mean().item()) 88 | stats['med'].append(disp.median().item()) 89 | stats['min'].append(disp.min().item()) 90 | stats['max'].append(disp.max().item()) 91 | stats['std'].append(disp.std().item()) 92 | 93 | stats_norm['mean'].append(disp_norm.mean().item()) 94 | stats_norm['med'].append(disp_norm.median().item()) 95 | stats_norm['min'].append(disp_norm.min().item()) 96 | stats_norm['max'].append(disp_norm.max().item()) 97 | stats_norm['std'].append(disp_norm.std().item()) 98 | 99 | # sim_disp, sim_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_simDepthImage.exr") 100 | # sim_disp, sim_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_simDispImage.png") 101 | # raw_disp, raw_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_depth.exr") 102 | 103 | # epe = torch.abs(sim_disp[sim_valid] - disp[sim_valid]).mean() 104 | if True: #&epe > 2.: 105 | # print(f"bad quality sim disp, epe={epe}, {data['path']}") 106 | # bads[data['path'][b]] = epe 107 | 108 | if "normalized_rgb" in data: 109 | rgb = data['normalized_rgb'][b:b+1] 110 | Image.fromarray(((rgb[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_rgb.png") 111 | 112 | if True: 113 | left = data['left_image'][b:b+1] 114 | Image.fromarray(((left[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_left.png") 115 | 116 | right = data['right_image'][b:b+1] 117 | Image.fromarray(((right[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_right.png") 118 | 119 | H, W = disp.shape[-2:] 120 | device = left.device 121 | 122 | xx, yy = torch.meshgrid(torch.arange(W), torch.arange(H), indexing='xy') 123 | xx = xx.unsqueeze(0).repeat(1, 1, 1).to(device) 124 | yy = yy.unsqueeze(0).repeat(1, 1, 1).to(device) 125 | 126 | # raw_disp = data['raw_disp'][b] 127 | xx = (xx - disp) / ((W - 1) / 2.) - 1 128 | yy = yy / ((H - 1) / 2.) - 1 129 | grid = torch.stack((xx, yy), dim=-1) 130 | warp_left_image = F.grid_sample(right, grid, align_corners=True, mode="bilinear", padding_mode="border") 131 | warp_left_image[0][mask.repeat(3,1,1)<1.0] = -1 132 | Image.fromarray(((warp_left_image[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_warped_right.png") 133 | loss = F.l1_loss(left[..., 0:], warp_left_image, reduction='mean') 134 | logger.info(f"raw disp loss: {loss.item()}") 135 | 136 | sim_disp = norm.denormalize(data["sim_disp"])[b] 137 | xx, yy = torch.meshgrid(torch.arange(W), torch.arange(H), indexing='xy') 138 | xx = xx.unsqueeze(0).repeat(B, 1, 1).to(device) 139 | yy = yy.unsqueeze(0).repeat(B, 1, 1).to(device) 140 | xx = (xx - sim_disp) / ((W - 1) / 2.) - 1 141 | yy = yy / ((H - 1) / 2.) - 1 142 | sim_grid = torch.stack((xx, yy), dim=-1) 143 | warp_left_image_sim = F.grid_sample(right, sim_grid, align_corners=True, mode="bilinear", padding_mode="border") 144 | # warp_left_image_sim[0][mask.repeat(3,1,1)<1.0] = -1 for sparse dataset 145 | warp_left_image_sim[0][mask.repeat(3,1,1)<1.0] = -1 146 | Image.fromarray(((warp_left_image_sim[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_warped_right_sim.png") 147 | loss_sim = F.l1_loss(left[..., 0:], warp_left_image_sim, reduction='mean') 148 | logger.info(f"sim disp loss: {loss_sim.item()}") 149 | 150 | """ if True or mask.sum() / mask.numel() < 0.98: 151 | bad.append(path) 152 | logger.info(f"bad image {index}: {path}") 153 | 154 | if True: 155 | # low, high = torch.quantile(data['depth'][b], torch.tensor((0.02, 0.98))) # gt depth 156 | # d = (data['depth'][b] - low) / (high - low) 157 | # Image.fromarray(mask[0].cpu().numpy().astype(np.uint8)*255).save(f"{index}_mask.png") 158 | # Image.fromarray((d[0].clamp(0,1)*255).cpu().numpy().astype(np.uint8)).save(f"{index}_depth_p.png") 159 | Image.fromarray(((rgb+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_rgb.png") """ 160 | 161 | count += 1 162 | if count % 1000 == 0: 163 | print("stats_raw...") 164 | print(f"tatal={len(stats['mean'])}") 165 | for k, vals in stats.items(): 166 | print(f"{k}: {np.mean(vals)}") 167 | print("stats_norm...") 168 | for k, vals in stats_norm.items(): 169 | print(f"{k}: {np.mean(vals)}") 170 | 171 | # break 172 | # break 173 | pbar.update(1) 174 | 175 | print(f"tatal={len(stats['mean'])}") 176 | print("stats_raw...") 177 | for k, vals in stats.items(): 178 | print(f"{k}: {np.mean(vals)}") 179 | print("stats_norm...") 180 | for k, vals in stats_norm.items(): 181 | print(f"{k}: {np.mean(vals)}") 182 | 183 | # print("stats:", stats) 184 | logger.info(f"how many bad images? {len(bads.items())}") 185 | with open(f'bad_his.txt', 'w') as f: 186 | for path,epe in bads.items(): 187 | f.write(f"{path} {epe}\n") 188 | 189 | if __name__ == "__main__": 190 | 191 | seed_everything(0) 192 | setup_hydra_configurations() 193 | check() -------------------------------------------------------------------------------- /data/data_loader.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from .mono_datasets import * 3 | from .stereo_datasets import * 4 | from config import TrainingConfig 5 | from omegaconf import OmegaConf 6 | from torch.utils.data.dataset import ConcatDataset 7 | from utils.camera import Realsense, RGBDCamera 8 | 9 | def create_dataset(config: TrainingConfig, dataset_name, split = "train"): 10 | mono_lst = ['NYUv2', 'ScanNet', 'HyperSim', 'SceneNet', 'ScanNetpp', 'VK2', 'KITTI', "Middlebury", "InStereo2K", "Tartenair", "HRWSI", "SynTODD"] 11 | stereo_lst = ["Dreds", "Middlebury", "SceneFlow", "Real", "HISS", "ClearPose", "SynTODDRgbd", "Gapartnet2"] 12 | image_size = tuple(config.image_size) 13 | 14 | if len(dataset_name.split("_")) > 1: # Real_split_device 15 | dataset_name, split, device = dataset_name.split("_") 16 | 17 | from utils.utils import Normalizer 18 | normalizer = Normalizer.from_config(config) 19 | 20 | if dataset_name in stereo_lst: 21 | cam_res = [int(x) for x in config.camera_resolution.split("x")[::-1]] 22 | 23 | if split == "train": 24 | # dataset = eval(dataset_name)(f"datasets/{dataset_name}", split="train", image_size=config.image_size, augment=config.augment, camera = config.camera) 25 | aug_params = {"crop_size": image_size, 26 | "min_scale": config.augment["min_scale"], 27 | "max_scale": config.augment["max_scale"], 28 | "yjitter": config.augment["yjitter"]} 29 | aug_params["saturation_range"] = tuple(config.augment["saturation_range"]) 30 | aug_params["gamma"] = config.augment["gamma"] 31 | aug_params["do_flip"] = config.augment["hflip"] #config.augment["hflip"]["prob"] > 0.0 32 | # aug_params["camera_resolution"] = cam_res 33 | if dataset_name == 'SceneFlow': # BUG? min disp=0.5, max disp=192.0? 34 | disp_reader = partial(frame_utils.read_sceneflow, cam_res) 35 | clean_dataset = SceneFlow(aug_params=aug_params, root="datasets/sceneflow", dstype='frames_cleanpass', 36 | reader=disp_reader, normalizer=normalizer) 37 | final_dataset = SceneFlow(aug_params=aug_params, root="datasets/sceneflow", dstype='frames_finalpass', 38 | reader=disp_reader, normalizer=normalizer) 39 | dataset = clean_dataset + final_dataset 40 | elif dataset_name == 'HISS': 41 | sim_camera = DepthCamera.from_device("sim") # BUG? max depth=5. 42 | # sim_camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}") 43 | sim_camera.change_resolution(config.camera_resolution) 44 | disp_reader = partial(frame_utils.readDispReal, sim_camera) 45 | dataset = HISS(sim_camera, normalizer, image_size, split, config.prediction_space, aug_params, reader=disp_reader) 46 | elif dataset_name == "Dreds": 47 | sim_camera = Realsense.default_sim() # BUG? max depth=2. 48 | # sim_camera.change_resolution(f"{image_size[1]}x{image_size[0]}") 49 | sim_camera.change_resolution(config.camera_resolution) 50 | # assert image_size == (126, 224) 51 | # disp_reader = partial(frame_utils.readDispDreds_exr, sim_camera) 52 | dataset = Dreds(sim_camera, normalizer, image_size, split, config.prediction_space, aug_params) 53 | elif dataset_name == "ClearPose": 54 | camera = RGBDCamera.default_clearpose() # BUG? max depth=5. 55 | camera.change_resolution(config.camera_resolution) 56 | disp_reader = partial(frame_utils.readDispReal, camera) 57 | dataset = ClearPose(camera, normalizer, image_size, split, config.prediction_space, reader=disp_reader) 58 | elif dataset_name == "SynTODDRgbd": 59 | camera = RGBDCamera.default_syntodd() 60 | camera.change_resolution(config.camera_resolution) 61 | disp_reader = partial(frame_utils.readDispReal, camera) 62 | dataset = SynTODDRgbd(config.dataset_variant, camera, normalizer, image_size, split, config.prediction_space, reader=disp_reader) 63 | elif dataset_name == "Gapartnet2": 64 | sim_camera = Realsense.from_device("sim") 65 | sim_camera.change_resolution(config.camera_resolution) 66 | disp_reader = partial(frame_utils.readDispReal, sim_camera) 67 | dataset = Gapartnet2(sim_camera, normalizer, image_size, split, config.prediction_space, aug_params, reader=disp_reader) 68 | else: 69 | raise NotImplementedError 70 | 71 | else: 72 | if dataset_name == 'SceneFlow': 73 | disp_reader = partial(frame_utils.read_sceneflow, cam_res) 74 | dataset = SceneFlow(root="datasets/sceneflow", dstype='frames_cleanpass', things_test=True, 75 | reader=disp_reader, normalizer=normalizer) 76 | elif dataset_name == "HISS": 77 | sim_camera = Realsense.from_device("sim") 78 | sim_camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}") 79 | disp_reader = partial(frame_utils.readDispReal, sim_camera) 80 | dataset = HISS(sim_camera, normalizer, image_size, split, space=config.prediction_space, reader=disp_reader) 81 | elif dataset_name == "Dreds": 82 | sim_camera = Realsense.default_sim() 83 | sim_camera.change_resolution(f"{image_size[1]}x{image_size[0]}") 84 | # assert image_size == (126, 224) # reprod dreds-1.0 85 | # disp_reader = partial(frame_utils.readDispDreds_exr, sim_camera) 86 | dataset = Dreds(sim_camera, normalizer, image_size, split, space=config.prediction_space) 87 | elif dataset_name == "Real": 88 | real_cam = Realsense.default_real("fxm") 89 | real_cam.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}") 90 | dataset = Real(camera=real_cam, normalizer=normalizer, 91 | image_size=image_size, scene=split, space=config.prediction_space) 92 | elif dataset_name == "ClearPose": 93 | camera = RGBDCamera.default_clearpose() 94 | camera.change_resolution(f"{image_size[1]}x{image_size[0]}") 95 | disp_reader = partial(frame_utils.readDispReal, camera) 96 | dataset = ClearPose(camera, normalizer, image_size, split, config.prediction_space, reader=disp_reader) 97 | elif dataset_name == "SynTODDRgbd": 98 | camera = RGBDCamera.default_syntodd() 99 | camera.change_resolution(f"{image_size[1]}x{image_size[0]}") 100 | disp_reader = partial(frame_utils.readDispReal, camera) 101 | dataset = SynTODDRgbd(config.dataset_variant, camera, normalizer, image_size, split, config.prediction_space, reader=disp_reader) 102 | elif dataset_name == "Gapartnet2": 103 | sim_camera = Realsense.from_device("sim") 104 | sim_camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}") 105 | disp_reader = partial(frame_utils.readDispReal, sim_camera) 106 | dataset = Gapartnet2(sim_camera, normalizer, image_size, split, space=config.prediction_space, reader=disp_reader) 107 | 108 | else: 109 | raise NotImplementedError 110 | 111 | elif dataset_name in mono_lst: 112 | if split == "train": 113 | dataset= eval(dataset_name)(f"datasets/{dataset_name}", split="train", image_size=image_size, augment=config.augment) 114 | else: 115 | dataset = eval(dataset_name)(f"datasets/{dataset_name}", split=split, image_size=image_size, augment=None) 116 | else: 117 | raise NotImplementedError 118 | return dataset 119 | 120 | def fetch_dataloader(config: TrainingConfig): 121 | """ Create the data loader for the corresponding trainign set """ 122 | 123 | """ if not isinstance(config.dataset, List): 124 | dataset_lst = [config.dataset] 125 | else: 126 | dataset_lst = config.dataset 127 | 128 | if not isinstance(config.dataset_weight, List): 129 | weight_lst = [config.dataset_weight] 130 | else: 131 | weight_lst = config.dataset_weight """ 132 | 133 | assert len(config.train_dataset) == len(config.dataset_weight) 134 | 135 | val_loader_lst = [] 136 | train_dataset = None 137 | for i, dataset_name in enumerate(config.train_dataset): 138 | new_dataset = create_dataset(config, dataset_name, split = "train") 139 | 140 | # multiple dataset weights 141 | if type(new_dataset) == ConcatDataset: 142 | # hack: unsupported operand type(s) for *: 'ConcatDataset' and 'int' 143 | for i in range(max(0, int(config.dataset_weight[i])-1)): 144 | new_dataset += new_dataset 145 | else: 146 | new_dataset = new_dataset * config.dataset_weight[i] 147 | 148 | # add train dataset together 149 | train_dataset = new_dataset if train_dataset is None else train_dataset + new_dataset 150 | 151 | for i, dataset_name in enumerate(config.eval_dataset): 152 | # saperately evaluate each dataset 153 | val_dataset = create_dataset(config, dataset_name, split = "val") 154 | val_dataloader = torch.utils.data.DataLoader(val_dataset, 155 | batch_size=config.eval_batch_size, 156 | shuffle=True, 157 | pin_memory=False, 158 | drop_last=False) 159 | val_loader_lst.append(val_dataloader) 160 | 161 | train_dataloader = torch.utils.data.DataLoader(train_dataset, 162 | batch_size=config.train_batch_size, 163 | shuffle=True, 164 | pin_memory=False, 165 | num_workers=int(os.environ.get('SLURM_CPUS_PER_TASK', 6))-2, 166 | drop_last=True) 167 | 168 | logging.info('Training with %d image pairs' % len(train_dataset)) 169 | return train_dataloader, val_loader_lst 170 | 171 | -------------------------------------------------------------------------------- /isaacsim/replicator.py: -------------------------------------------------------------------------------- 1 | 2 | import os, sys 3 | import csv, copy, math 4 | import time, json 5 | import numpy as np 6 | import random 7 | import transforms3d as t3d 8 | # from scipy.spatial.transform import Rotation 9 | from typing import Union, Type, List 10 | from functools import partial 11 | from PIL import Image 12 | 13 | import carb 14 | import omni.replicator.core as rep 15 | import omni.usd 16 | from omni.isaac.kit import SimulationApp 17 | 18 | from omni.isaac.core.utils.nucleus import get_assets_root_path 19 | 20 | from omni.isaac.core.utils.bounds import compute_combined_aabb, create_bbox_cache 21 | from omni.isaac.core import World 22 | from omni.isaac.core.utils.stage import add_reference_to_stage 23 | from omni.replicator.core import Writer, AnnotatorRegistry 24 | from omni.isaac.core.utils.rotations import euler_angles_to_quat, quat_to_euler_angles 25 | from omni.isaac.core.objects import DynamicCuboid 26 | from pxr import Gf, Sdf, Usd, PhysxSchema, UsdGeom, UsdLux, UsdPhysics, UsdShade 27 | 28 | # import offline_generation_utils 29 | from hydra.utils import get_original_cwd, to_absolute_path 30 | from omegaconf import DictConfig 31 | 32 | from custom_writer import ColorWriter, GtWriter, IRWriter 33 | from omni.replicator.core import WriterRegistry 34 | from replicate import Replicator 35 | 36 | scene_prim_path = "/World/scene" #!! 37 | 38 | class IRReplicator: 39 | def __init__(self, app: SimulationApp, world: World, config:DictConfig) -> None: 40 | self._app = app 41 | self._world = world 42 | self._config = config 43 | self._log = self._app.app.print_and_log 44 | 45 | # Get server path 46 | # self.assets_root_path = get_assets_root_path() 47 | # if self.assets_root_path is None: 48 | # carb.log_error("Could not get nucleus server path, closing application..") 49 | # app.close() 50 | 51 | # load different scene replicator according to configuration 52 | self.replicator = Replicator.factory(world, config) 53 | 54 | # self._light: Usd.Prim = self.setup_lighting() 55 | 56 | self._scene: Usd.Prim = self.load_scene() 57 | # self._world.scene.add_default_ground_plane() 58 | """ self.scene = UsdPhysics.Scene.Define(self._world.stage, Sdf.Path("/physicsScene")) 59 | self.scene.CreateGravityDirectionAttr().Set(Gf.Vec3f(0.0, 0.0, -1.0)) 60 | self.scene.CreateGravityMagnitudeAttr().Set(9.81) 61 | omni.kit.commands.execute( 62 | "AddGroundPlaneCommand", 63 | stage=self._world.stage, 64 | planePath="/groundPlane", 65 | axis="Z", 66 | size=10.000, 67 | position=Gf.Vec3f(0, 0, -0.01), # hack to hide ground mesh 68 | color=Gf.Vec3f(0.5), 69 | ) """ 70 | 71 | # self._mats = self.load_materials() 72 | 73 | # Disable capture on play and async rendering 74 | carb.settings.get_settings().set("/omni/replicator/captureOnPlay", False) 75 | carb.settings.get_settings().set("/omni/replicator/asyncRendering", False) 76 | carb.settings.get_settings().set("/app/asyncRendering", False) 77 | 78 | # https://forums.developer.nvidia.com/t/replicator-images-contain-artifacts-from-other-frames/220837 79 | # carb.settings.get_settings().set("/rtx/ambientOcclusion/enabled", False) 80 | # rep.settings.set_render_rtx_realtime(antialiasing="FXAA") 81 | 82 | # start replicator 83 | if self._config["rt_subframes"] > 1: 84 | rep.settings.carb_settings("/omni/replicator/RTSubframes", self._config["rt_subframes"]) 85 | else: 86 | carb.log_warn("RTSubframes is set to 1, consider increasing it if materials are not loaded on time") 87 | 88 | self.clear_previous_semantics() 89 | 90 | self.output_dir = os.path.join(os.path.dirname(__file__), config["writer_config"]["output_dir"]) 91 | if not os.path.exists(self.output_dir): 92 | os.makedirs(self.output_dir) 93 | 94 | self.replicator.setup_depth_sensor() 95 | 96 | WriterRegistry.register(ColorWriter) 97 | WriterRegistry.register(GtWriter) 98 | WriterRegistry.register(IRWriter) 99 | 100 | self.dr = self.replicator.setup_domain_randomization() 101 | self._log(json.dumps(self.dr)) 102 | 103 | def clear_previous_semantics(self): 104 | return 105 | if self._config["clear_previous_semantics"]: 106 | offline_generation_utils.remove_previous_semantics(self._world.stage) 107 | 108 | 109 | def setup_lighting(self): 110 | # prim_path = "/World/DiskLight" 111 | # diskLight = UsdLux.DiskLight.Define(self._world.stage, Sdf.Path(prim_path)) 112 | # diskLight.CreateIntensityAttr(15000) 113 | 114 | # light = self._world.stage.GetPrimAtPath(prim_path) 115 | # if not light.GetAttribute("xformOp:translate"): 116 | # UsdGeom.Xformable(light).AddTranslateOp() 117 | # return light 118 | pass 119 | 120 | # def setup_projector_lighting(self): 121 | # prim_path = "/World/RectLight" 122 | # rectLight = UsdLux.RectLight.Define(self._world.stage, Sdf.Path(prim_path)) 123 | # rectLight.CreateIntensityAttr(500) 124 | # rectLight.Create 125 | 126 | def load_scene(self): 127 | scene_name = self._config["hssd"]["name"] 128 | data_dir = os.path.abspath(self._config.hssd["data_dir"]) 129 | env_url = f"{data_dir}/{scene_name}/{scene_name}.usd" 130 | assert os.path.exists(env_url), f"Scene file {env_url} does not exist" 131 | add_reference_to_stage(usd_path=env_url, prim_path=scene_prim_path) 132 | 133 | hssd_env = self._world.stage.GetPrimAtPath(scene_prim_path) 134 | if not hssd_env.GetAttribute("xformOp:translate"): 135 | UsdGeom.Xformable(hssd_env).AddTranslateOp() 136 | if not hssd_env.GetAttribute("xformOp:rotateXYZ"): 137 | UsdGeom.Xformable(hssd_env).AddRotateXYZOp() 138 | if not hssd_env.GetAttribute("xformOp:scale"): 139 | UsdGeom.Xformable(hssd_env).AddScaleOp() 140 | 141 | hssd_env.GetAttribute("xformOp:rotateXYZ").Set((90, 0, 0)) 142 | scale = self._config["hssd"]["scale"] 143 | hssd_env.GetAttribute("xformOp:scale").Set((scale, scale, scale)) 144 | 145 | if self._config["hssd"]["hide_ceilings"]: 146 | ceiling = hssd_env.GetPrimAtPath(f"{scene_prim_path}/ceilings") 147 | ceiling.GetAttribute("visibility").Set("invisible") 148 | 149 | if self._config["hssd"]["hide_walls"]: # an ugly hack 150 | walls = hssd_env.GetPrimAtPath(f"{scene_prim_path}/walls") 151 | walls.GetAttribute("visibility").Set("invisible") 152 | 153 | return hssd_env 154 | 155 | # deprecated 156 | def load_materials(self): 157 | #https://forums.developer.nvidia.com/t/how-can-i-change-material-of-the-existing-object-in-runtime/161253 158 | # path_mat_glass_clear = assets_root_path + "/NVIDIA/Materials/vMaterials_2/Glass/Glass_Clear.mdl" 159 | path_mat_glass_clear = "omniverse://localhost/NVIDIA/Materials/vMaterials_2/Glass/Glass_Clear.mdl" 160 | # load more 161 | success, result = omni.kit.commands.execute('CreateMdlMaterialPrimCommand', 162 | mtl_url=path_mat_glass_clear, # This can be path to local or remote MDL 163 | mtl_name='Glass_Clear', # sourceAsset:subIdentifier (i.e. the name of the material within the MDL) 164 | mtl_path="/World/Looks/Glass_Clear" # Prim path for the Material to create. 165 | ) 166 | t = UsdShade.Material(self._world.stage.GetPrimAtPath("/World/Looks/Glass_Clear")) 167 | 168 | path_mat_metal_aluminum = "omniverse://localhost/NVIDIA/Materials/vMaterials_2/Metal/Aluminum.mdl" 169 | success, result = omni.kit.commands.execute('CreateMdlMaterialPrimCommand', 170 | mtl_url=path_mat_glass_clear, # This can be path to local or remote MDL 171 | mtl_name='Aluminum', 172 | mtl_path="/World/Looks/Aluminum" # Prim path for the Material to create. 173 | ) 174 | s = UsdShade.Material(self._world.stage.GetPrimAtPath("/World/Looks/Aluminum")) 175 | 176 | return { 177 | 'transparent': [t], # TODO add more 178 | 'specular': [s] # TODO add more 179 | } 180 | 181 | # deprecated 182 | def create_rep_object(self, surface_center_pos): 183 | test_model = rep.create.from_usd(f"file:///home/songlin/Projects/DREDS/DepthSensorSimulator/cad_model/02691156/1c93b0eb9c313f5d9a6e43b878d5b335_converted/model_obj.usd", 184 | semantics=[("class", "test")]) 185 | 186 | test_ball = rep.create.sphere(name="test_ball", position=surface_center_pos, scale=(0.1, 0.1, 0.1)) 187 | with test_model: 188 | rep.physics.collider() 189 | rep.physics.rigid_body( 190 | # velocity=rep.distribution.uniform((-0,0,-0),(0,0,1)), 191 | # angular_velocity=rep.distribution.uniform((-0,0,-100),(0,0,0)) 192 | ) 193 | 194 | 195 | 196 | def start(self): 197 | # self.debug = 0 198 | # Find the desired surface 199 | # for surface_config in self._config["hssd"]['surfaces']: 200 | # surface = self._config["hssd"]['surface'] 201 | self.replicator.render() 202 | 203 | """ def randomize_texture(self, dred_models): 204 | materials = create_materials(self._world.stage, len(dred_models)) 205 | assets_root_path = get_assets_root_path() 206 | textures = [ 207 | assets_root_path + "/NVIDIA/Materials/vMaterials_2/Ground/textures/aggregate_exposed_diff.jpg", 208 | assets_root_path + "/NVIDIA/Materials/vMaterials_2/Ground/textures/gravel_track_ballast_diff.jpg", 209 | assets_root_path + "/NVIDIA/Materials/vMaterials_2/Ground/textures/gravel_track_ballast_multi_R_rough_G_ao.jpg", 210 | assets_root_path + "/NVIDIA/Materials/vMaterials_2/Ground/textures/rough_gravel_rough.jpg", 211 | ] 212 | 213 | delay=0.2 214 | initial_materials = {} 215 | for i, shape in dred_models.items(): #enumerate(): 216 | cur_mat, _ = UsdShade.MaterialBindingAPI(shape).ComputeBoundMaterial() 217 | initial_materials[shape] = cur_mat 218 | UsdShade.MaterialBindingAPI(shape).Bind(materials[i-1], UsdShade.Tokens.strongerThanDescendants) 219 | 220 | for mat in materials: 221 | shader = UsdShade.Shader(omni.usd.get_shader_from_material(mat, get_prim=True)) 222 | # diffuse_texture = np.random.choice(textures) 223 | # shader.GetInput("diffuse_texture").Set(diffuse_texture) 224 | 225 | # project_uvw = np.random.choice([True, False], p=[0.9, 0.1]) 226 | # shader.GetInput("project_uvw").Set(bool(project_uvw)) 227 | 228 | # texture_scale = np.random.uniform(0.1, 1) 229 | # shader.GetInput("texture_scale").Set((texture_scale, texture_scale)) 230 | 231 | # texture_rotate = np.random.uniform(0, 45) 232 | # shader.GetInput("texture_rotate").Set(texture_rotate) 233 | 234 | shader.GetInput("metallic_constant").Set(1.0) 235 | shader.GetInput("reflection_roughness_constant").Set(0.0) """ 236 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |