├── utils
    ├── __init__.py
    ├── losess.py
    ├── ransac.py
    └── frame_utils.py
├── isaacsim
    ├── .gitignore
    ├── requirements.txt
    ├── pattern.png
    ├── replicate
    │   ├── __init__.py
    │   └── std_object.py
    ├── render.py
    ├── README.md
    ├── config
    │   └── hssd.yaml
    ├── utils_func.py
    ├── replicator.py
    └── custom_writer.py
├── datasets
    ├── .gitignore
    ├── Real
    │   └── xiaomeng
    │   │   ├── 0000_ir_l.png
    │   │   ├── 0000_ir_r.png
    │   │   ├── 0000_rgb.png
    │   │   ├── 0000_depth.png
    │   │   └── 0000_raw_disparity.png
    └── README.md
├── raw_aligned.png
├── assets
    ├── in-the-wild.png
    └── examples
    │   ├── 0000_ir_l.png
    │   ├── 0000_ir_r.png
    │   ├── 0000_rgb.png
    │   └── 0000_depth.png
├── .gitignore
├── conf
    ├── config.yaml
    └── task
    │   ├── eval_ldm_his.yaml
    │   ├── eval_his_sim.yaml
    │   ├── eval_ldm_mixed.yaml
    │   ├── eval_dreds_reprod.yaml
    │   ├── eval_ldm_mixed_rgb+raw.yaml
    │   ├── eval_ldm_mixed_cond_rgbd.yaml
    │   ├── eval_clearpose.yaml
    │   ├── eval_syntodd_rgbd.yaml
    │   ├── eval_sceneflow.yaml
    │   ├── eval_ldm_mono.yaml
    │   ├── eval_ldm.yaml
    │   ├── train_ldm_mixed.yaml
    │   ├── train_ldm_mono.yaml
    │   ├── train_sceneflow.yaml
    │   ├── train_hiss.yaml
    │   ├── train_ldm_mixed_rgb+raw.yaml
    │   ├── train_dreds_reprod.yaml
    │   ├── train_ldm_mixed_left+right+raw.yaml
    │   ├── train_ldm_mixed_cond_rgbd.yaml
    │   ├── train_clearpose.yaml
    │   ├── train_ldm_mixed_gapartnet.yaml
    │   └── train_syntodd_rgbd.yaml
├── pyrightconfig.json
├── scripts
    ├── check_sceneflow.py
    └── check_stereo.py
├── data
    ├── dataset.py
    ├── data_loader.py
    └── augmentor.py
├── core
    ├── praser.py
    └── resample.py
├── distributed_evaluate.py
├── README.md
├── config.py
└── inference.py


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/isaacsim/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | output_ir
3 | **/*.pyc


--------------------------------------------------------------------------------
/isaacsim/requirements.txt:
--------------------------------------------------------------------------------
1 | hydra-core==1.3.2
2 | transforms3d


--------------------------------------------------------------------------------
/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | clearpose**
2 | DREDS**
3 | HISS**
4 | sceneflow**


--------------------------------------------------------------------------------
/raw_aligned.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/raw_aligned.png


--------------------------------------------------------------------------------
/isaacsim/pattern.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/isaacsim/pattern.png


--------------------------------------------------------------------------------
/assets/in-the-wild.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/in-the-wild.png


--------------------------------------------------------------------------------
/assets/examples/0000_ir_l.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/examples/0000_ir_l.png


--------------------------------------------------------------------------------
/assets/examples/0000_ir_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/examples/0000_ir_r.png


--------------------------------------------------------------------------------
/assets/examples/0000_rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/examples/0000_rgb.png


--------------------------------------------------------------------------------
/assets/examples/0000_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/assets/examples/0000_depth.png


--------------------------------------------------------------------------------
/datasets/Real/xiaomeng/0000_ir_l.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_ir_l.png


--------------------------------------------------------------------------------
/datasets/Real/xiaomeng/0000_ir_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_ir_r.png


--------------------------------------------------------------------------------
/datasets/Real/xiaomeng/0000_rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_rgb.png


--------------------------------------------------------------------------------
/datasets/Real/xiaomeng/0000_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_depth.png


--------------------------------------------------------------------------------
/isaacsim/replicate/__init__.py:
--------------------------------------------------------------------------------
1 | from .scene_replicator import Replicator
2 | from .std_object import STDObjectReplicator
3 | 
4 | 


--------------------------------------------------------------------------------
/datasets/Real/xiaomeng/0000_raw_disparity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/songlin/d3roma/HEAD/datasets/Real/xiaomeng/0000_raw_disparity.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/*.pyc
 2 | experiments*
 3 | checkpoint
 4 | _outputs*
 5 | _outputs/**
 6 | checkpoint/**
 7 | test_*
 8 | backup
 9 | bad_sim*
10 | .vscode
11 | 


--------------------------------------------------------------------------------
/conf/config.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | defaults:
 3 |   - _self_
 4 |   - task: train_ldm
 5 | 
 6 | debug: false
 7 | seed: -1
 8 | 
 9 | hydra:
10 |   run:
11 |     dir: _outputs/${hydra.job.name}
12 | 


--------------------------------------------------------------------------------
/conf/task/eval_ldm_his.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_ldm_his
 3 | 
 4 | name: ldm_his
 5 | resume_pretrained: 
 6 | camera_resolution: 640x360 #  W,H
 7 | image_size: [180, 320] # H,W
 8 | eval_dataset: [HISS]
 9 | eval_num_batch: -1
10 | eval_batch_size: 4
11 | num_inference_timesteps: 10
12 | num_intermediate_images: 5
13 | num_inference_rounds: 1
14 | 
15 | 


--------------------------------------------------------------------------------
/conf/task/eval_his_sim.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_his_sim
 3 | 
 4 | resume_pretrained: 
 5 | camera_resolution: 224x126 #  WxH
 6 | image_size: [126, 224] # H,W
 7 | safe_ssi: true
 8 | eval_dataset: [HISS]
 9 | eval_num_batch: -1
10 | eval_batch_size: 32
11 | sampler: my_ddpm
12 | num_inference_timesteps: 128
13 | num_intermediate_images: 8
14 | num_inference_rounds: 1
15 | write_pcd: true


--------------------------------------------------------------------------------
/conf/task/eval_ldm_mixed.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_ldm_mixed
 3 | 
 4 | # was the best version in real during the submition to CoRL 2024
 5 | name: eval_ldm_sf
 6 | resume_pretrained: 
 7 | camera_resolution: 480x270 #  W,H
 8 | image_size: [180,320] # H,W
 9 | eval_dataset: [Real_xiaomeng_fxm]
10 | eval_num_batch: -1
11 | eval_batch_size: 4
12 | num_inference_timesteps: 10
13 | num_intermediate_images: 5
14 | num_inference_rounds: 1
15 | 


--------------------------------------------------------------------------------
/conf/task/eval_dreds_reprod.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_dreds_reprod
 3 | 
 4 | name: dreds
 5 | resume_pretrained: 
 6 | cond_channels: left+right+raw
 7 | camera_resolution: 224x126 #  WxH
 8 | image_size: [126, 224] # H,W
 9 | safe_ssi: true
10 | train_dataset: [Dreds]
11 | eval_dataset: [Dreds]
12 | eval_num_batch: -1
13 | eval_batch_size: 32
14 | save_model_epochs: 5
15 | num_inference_timesteps: 128
16 | num_intermediate_images: 8
17 | sampler: my_ddpm
18 | 


--------------------------------------------------------------------------------
/conf/task/eval_ldm_mixed_rgb+raw.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_ldm_mixed_rgb+raw
 3 | 
 4 | # was the best version in real during the submition to CoRL 2024
 5 | name: eval_ldm_mixed_rgb+raw
 6 | resume_pretrained: 
 7 | camera_resolution: 480x270 #  W,H
 8 | image_size: [180,320] # H,W
 9 | eval_dataset: [Real_xiaomeng_fxm]
10 | eval_num_batch: -1
11 | eval_batch_size: 4
12 | num_inference_timesteps: 10
13 | num_intermediate_images: 5
14 | num_inference_rounds: 1
15 | 


--------------------------------------------------------------------------------
/conf/task/eval_ldm_mixed_cond_rgbd.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_ldm_mixed_cond_rgbd
 3 | 
 4 | name: ldm_sf
 5 | resume_pretrained: experiments/ldm_sf-0807.dep4.lr3e-05.v_prediction.nossi.scaled_linear.randn.ddpm1000.ClearPose_Dreds_HISS.240x320.rgb+raw.w0.0/best
 6 | camera_resolution: 320x240 #  WxH
 7 | image_size: [240,320] # H,W
 8 | eval_dataset: [ClearPose]
 9 | eval_num_batch: -1
10 | sampler: ddim
11 | num_inference_timesteps: 10
12 | num_intermediate_images: 5
13 | num_inference_rounds: 1


--------------------------------------------------------------------------------
/conf/task/eval_clearpose.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_clearpose
 3 | 
 4 | name: clearpose
 5 | resume_pretrained: experiments/clearpose-0809.dep1.lr1e-04.sample.ssi.squaredcos_cap_v2.pyramid.my_ddpm128.ClearPose_Dreds_HISS.240x320.rgb+raw.w0.0/best
 6 | eval_num_batch: -1
 7 | camera_resolution: 320x240 #  WxH
 8 | image_size: [240,320] # H,W
 9 | eval_dataset: [ClearPose]
10 | num_intermediate_images: 8
11 | sampler: my_ddpm
12 | plot_error_map: false
13 | plot_denoised_images: false
14 | eval_batch_size: 96
15 | eval_split: "test"
16 | safe_ssi: false
17 | 


--------------------------------------------------------------------------------
/conf/task/eval_syntodd_rgbd.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_syntodd_rgbd
 3 | 
 4 | name: clearpose
 5 | resume_pretrained: experiments/syntodd_rgbd-0810.dep1.lr1e-04.sample.ssi.squaredcos_cap_v2.pyramid.my_ddpm128.SynTODDRgbd.240x320.rgb+raw.w0.0/best
 6 | eval_num_batch: -1
 7 | camera_resolution: 320x240 #  WxH
 8 | image_size: [240,320] # H,W
 9 | eval_dataset: [SynTODDRgbd]
10 | num_intermediate_images: 8
11 | sampler: my_ddpm
12 | plot_error_map: false
13 | plot_denoised_images: false
14 | eval_batch_size: 12
15 | eval_split: "test"
16 | safe_ssi: false
17 | 


--------------------------------------------------------------------------------
/conf/task/eval_sceneflow.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_sceneflow
 3 | 
 4 | name: eval_sceneflow
 5 | resume_pretrained: 
 6 | eval_dataset: [SceneFlow]
 7 | eval_split: val
 8 | camera_resolution: 960x540
 9 | image_size: [540, 960]
10 | eval_num_batch: -1
11 | eval_batch_size: 3
12 | eval_output: "" # use default 
13 | prediction_type: sample
14 | flow_guidance_mode: imputation
15 | flow_guidance_weights: [0]
16 | num_inference_rounds: 1
17 | num_inference_timesteps: 10
18 | num_intermediate_images: 5
19 | plot_denoised_images: true
20 | plot_intermediate_metrics: false
21 | write_pcd: false
22 | plot_error_map: true
23 | ensemble: false
24 | ssi: false


--------------------------------------------------------------------------------
/pyrightconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "exclude": ["datasets", "experiments", "experiments.corl24", "checkpoint", "_outputs"],
 3 |     "reportPrivateImportUsage": false,
 4 |     "reportOptionalMemberAccess": false,
 5 |     "reportCallIssue": false,
 6 |     "reportPossiblyUnboundVariable": false,
 7 |     "reportArgumentType": false,
 8 |     "reportOptionalSubscript": false,
 9 |     "reportAttributeAccessIssue": false,
10 |     "reportOptionalOperand": false,
11 |     "reportIndexIssue": false,
12 |     "reportAssignmentType": false,
13 |     "reportOperatorIssue": false,
14 |     "reportReturnType": false,
15 |     "reportGeneralTypeIssues": false
16 | }


--------------------------------------------------------------------------------
/conf/task/eval_ldm_mono.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_ldm_mono
 3 | 
 4 | name: eval_ldm_mono
 5 | resume_pretrained: experiments/ldm_mono-0809.dep4.lr3e-05.v_prediction.ssi.scaled_linear.randn.ddpm1000.SynTODD.240x320.rgb.w0.0/best
 6 | eval_dataset: [SynTODD]
 7 | eval_split: test
 8 | # camera_resolution: 640x480
 9 | # image_size: [480, 640]
10 | eval_num_batch: -1
11 | eval_batch_size: 16
12 | num_inference_rounds: 1
13 | num_inference_timesteps: 10
14 | num_intermediate_images: 5
15 | plot_denoised_images: false
16 | plot_error_map: true
17 | write_pcd: false
18 | # ensemble: false
19 | # safe_ssi: true
20 | # ransac_error_threshold: 0.6 # rmse error, 0.6 for nyu
21 | 
22 | 


--------------------------------------------------------------------------------
/conf/task/eval_ldm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - train_ldm
 3 | 
 4 | name: eval_ft_sd2_hypersim
 5 | resume_pretrained: experiments/d.fixed.lr3e-05.v_prediction.ssi.scaled_linear.randn.ssi.my_ddpm1000.HyperSim.240x320.cond4.w0.0/epoch_0038
 6 | # train_dataset: [HyperSim]
 7 | eval_dataset: [NYUv2]
 8 | eval_split: val
 9 | camera_resolution: 640x480
10 | image_size: [480, 640]
11 | eval_num_batch: -1
12 | eval_batch_size: 3
13 | eval_output: "" # use default 
14 | flow_guidance_mode: imputation
15 | flow_guidance_weights: [0]
16 | num_inference_rounds: 1
17 | num_inference_timesteps: 10
18 | num_intermediate_images: 5
19 | plot_denoised_images: true
20 | write_pcd: false
21 | plot_error_map: true
22 | ensemble: false
23 | # safe_ssi: true
24 | # ransac_error_threshold: 0.6 # rmse error, 0.6 for nyu
25 | 
26 | 


--------------------------------------------------------------------------------
/conf/task/train_ldm_mixed.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: ldm_sf
 5 | resume_pretrained: 
 6 | ldm: true
 7 | depth_channels: 4
 8 | divis_by: 8
 9 | prediction_space: disp
10 | camera_resolution: 480x270 #  W,H
11 | image_size: [180,320] # H,W
12 | train_dataset: [SceneFlow]
13 | eval_dataset: [SceneFlow]
14 | train_batch_size: 16
15 | gradient_accumulation_steps: 1
16 | eval_num_batch: -1
17 | eval_batch_size: 4
18 | lr_warmup_steps: 0
19 | learning_rate: 3e-5
20 | lr_scheduler: constant # linear: almost the same as constant
21 | val_every_global_steps: 1000
22 | save_model_epochs: 3
23 | num_train_timesteps: 1000
24 | num_inference_timesteps: 10
25 | num_intermediate_images: 5
26 | num_inference_rounds: 1
27 | ssi: false
28 | normalize_mode: average
29 | num_chs: 1
30 | ch_bounds: [128.]
31 | ch_gammas: [1.]
32 | noise_strategy: randn
33 | loss_type: mse
34 | prediction_type: v_prediction
35 | sampler: ddpm
36 | num_epochs: 200
37 | cond_channels: left+right+raw
38 | beta_schedule: scaled_linear
39 | beta_start: 0.00085
40 | beta_end: 0.012
41 | mixed_precision: "no"
42 | thresholding: false 
43 | clip_sample: false
44 | block_out_channels: [0] # N/A
45 | 


--------------------------------------------------------------------------------
/conf/task/train_ldm_mono.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: ldm_mono
 5 | resume_pretrained: 
 6 | ldm: true
 7 | depth_channels: 4
 8 | divis_by: 8
 9 | prediction_space: depth
10 | camera_resolution: 320x240 #  WxH
11 | image_size: [240,320] # H,W
12 | train_dataset: [SynTODD]
13 | eval_dataset: [SynTODD]
14 | dataset_weight: [1]
15 | train_batch_size: 12
16 | gradient_accumulation_steps: 1
17 | eval_num_batch: -1
18 | eval_batch_size: 4
19 | lr_warmup_steps: 5000
20 | learning_rate: 3e-5
21 | lr_scheduler: constant # linear: almost the same as constant
22 | val_every_global_steps: 1000
23 | save_model_epochs: 3
24 | num_train_timesteps: 1000
25 | num_inference_timesteps: 10
26 | num_intermediate_images: 5
27 | num_inference_rounds: 1
28 | ssi: true
29 | normalize_mode: average
30 | num_chs: 1
31 | ch_bounds: [1.]
32 | ch_gammas: [1.]
33 | noise_strategy: randn
34 | loss_type: mse
35 | prediction_type: v_prediction
36 | sampler: ddpm
37 | num_epochs: 200
38 | cond_channels: rgb
39 | beta_schedule: scaled_linear
40 | beta_start: 0.00085
41 | beta_end: 0.012
42 | mixed_precision: "no"
43 | thresholding: false 
44 | clip_sample: false
45 | block_out_channels: [0] # N/A
46 | 


--------------------------------------------------------------------------------
/conf/task/train_sceneflow.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: sceneflow
 5 | ldm: false
 6 | prediction_space: disp
 7 | ssi: false
 8 | normalize_mode: average
 9 | ch_bounds: [128]
10 | ch_gammas: [1.0]
11 | resume_pretrained: 
12 | camera_resolution: 480x270 #960x540 #  W,H
13 | image_size: [270, 480] # H,W
14 | train_dataset: [SceneFlow]
15 | eval_dataset: [SceneFlow]
16 | train_batch_size: 4
17 | eval_num_batch: -1
18 | eval_batch_size: 8
19 | lr_warmup_steps: 1000
20 | learning_rate: 1e-4
21 | lr_scheduler: linear
22 | gradient_accumulation_steps: 1
23 | val_every_global_steps: 2000
24 | save_model_epochs: 5
25 | num_train_timesteps: 128
26 | num_inference_timesteps: 10
27 | num_intermediate_images: 5
28 | num_inference_rounds: 1
29 | block_out_channels: [128, 128, 256, 256, 512, 512]
30 | noise_strategy: pyramid
31 | loss_type: l1
32 | prediction_type: sample
33 | num_epochs: 600
34 | cond_channels: left+right+raw
35 | depth_channels: 3
36 | beta_schedule: squaredcos_cap_v2
37 | beta_start: 1e-4
38 | beta_end: 2e-2
39 | sampler: my_ddpm 
40 | mixed_precision: "no"
41 | thresholding: true 
42 | dynamic_thresholding_ratio: 0.995
43 | clip_sample: true 
44 | clip_sample_range: 1.0


--------------------------------------------------------------------------------
/conf/task/train_hiss.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: hiss
 5 | ldm: false
 6 | prediction_space: disp
 7 | resume_pretrained: 
 8 | cond_channels: left+right+raw
 9 | camera_resolution: 224x126 #  WxH
10 | image_size: [126, 224] # H,W
11 | ssi: true
12 | safe_ssi: true
13 | train_dataset: [HISS]
14 | eval_dataset: [HISS]
15 | normalize_mode: average
16 | ch_bounds: [64.]
17 | ch_gammas: [1.]
18 | num_chs: 1
19 | norm_s: 2
20 | norm_t: 0.5
21 | train_batch_size: 32
22 | eval_num_batch: -1
23 | eval_batch_size: 32
24 | lr_warmup_steps: 1000
25 | learning_rate: 0.0001
26 | lr_scheduler: constant
27 | gradient_accumulation_steps: 1
28 | val_every_global_steps: 5000
29 | save_model_epochs: 5
30 | num_train_timesteps: 128
31 | num_inference_timesteps: 8
32 | num_intermediate_images: 4
33 | num_inference_rounds: 1
34 | block_out_channels: [128, 128, 256, 256, 512, 512]
35 | noise_strategy: pyramid
36 | loss_type: mse
37 | prediction_type: sample
38 | num_epochs: 200
39 | depth_channels: 1
40 | beta_schedule: squaredcos_cap_v2
41 | beta_start: 0.0001
42 | beta_end: 0.02
43 | sampler: my_ddpm
44 | mixed_precision: "no"
45 | thresholding: true 
46 | dynamic_thresholding_ratio: 0.995
47 | clip_sample: true 
48 | clip_sample_range: 1.0


--------------------------------------------------------------------------------
/conf/task/train_ldm_mixed_rgb+raw.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: ldm_sf
 5 | resume_pretrained: 
 6 | ldm: true
 7 | depth_channels: 4
 8 | divis_by: 8
 9 | prediction_space: disp
10 | camera_resolution: 480x270 #  W,H
11 | image_size: [180,320] # H,W
12 | train_dataset: [Dreds, HISS, ClearPose]
13 | dataset_weight: [1,1,1] 
14 | eval_dataset: [Dreds, HISS, Real_xiaomeng_fxm]
15 | train_batch_size: 16
16 | gradient_accumulation_steps: 1
17 | eval_num_batch: -1
18 | eval_batch_size: 4
19 | lr_warmup_steps: 0
20 | learning_rate: 3e-5
21 | lr_scheduler: constant # linear: almost the same as constant
22 | val_every_global_steps: 1000
23 | save_model_epochs: 3
24 | num_train_timesteps: 1000
25 | num_inference_timesteps: 10
26 | num_intermediate_images: 5
27 | num_inference_rounds: 1
28 | ssi: false
29 | normalize_mode: average
30 | num_chs: 1
31 | ch_bounds: [128.]
32 | ch_gammas: [1.]
33 | noise_strategy: randn
34 | loss_type: mse
35 | prediction_type: v_prediction
36 | sampler: ddpm
37 | num_epochs: 200
38 | cond_channels: rgb+raw
39 | beta_schedule: scaled_linear
40 | beta_start: 0.00085
41 | beta_end: 0.012
42 | mixed_precision: "no"
43 | thresholding: false 
44 | clip_sample: false
45 | block_out_channels: [0] # N/A
46 | 


--------------------------------------------------------------------------------
/conf/task/train_dreds_reprod.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: dreds
 5 | ldm: false
 6 | prediction_space: disp
 7 | resume_pretrained: 
 8 | cond_channels: left+right+raw
 9 | camera_resolution: 224x126 #  WxH
10 | image_size: [126, 224] # H,W
11 | ssi: true
12 | safe_ssi: true
13 | train_dataset: [Dreds]
14 | eval_dataset: [Dreds]
15 | normalize_mode: average
16 | ch_bounds: [64.]
17 | ch_gammas: [1.]
18 | num_chs: 1
19 | norm_s: 2
20 | norm_t: 0.5
21 | train_batch_size: 32
22 | eval_num_batch: -1
23 | eval_batch_size: 32
24 | lr_warmup_steps: 1000
25 | learning_rate: 0.0001
26 | lr_scheduler: constant
27 | gradient_accumulation_steps: 1
28 | val_every_global_steps: 5000
29 | save_model_epochs: 5
30 | num_train_timesteps: 128
31 | num_inference_timesteps: 8
32 | num_intermediate_images: 4
33 | num_inference_rounds: 1
34 | block_out_channels: [128, 128, 256, 256, 512, 512]
35 | noise_strategy: pyramid
36 | loss_type: mse
37 | prediction_type: sample
38 | num_epochs: 200
39 | depth_channels: 1
40 | beta_schedule: squaredcos_cap_v2
41 | beta_start: 0.0001
42 | beta_end: 0.02
43 | sampler: my_ddpm
44 | mixed_precision: "no"
45 | thresholding: true 
46 | dynamic_thresholding_ratio: 0.995
47 | clip_sample: true 
48 | clip_sample_range: 1.0


--------------------------------------------------------------------------------
/conf/task/train_ldm_mixed_left+right+raw.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: ldm_sf
 5 | resume_pretrained: 
 6 | ldm: true
 7 | depth_channels: 4
 8 | divis_by: 8
 9 | prediction_space: disp
10 | camera_resolution: 480x270 #  W,H
11 | image_size: [180,320] # H,W
12 | train_dataset: [SceneFlow, Dreds, HISS]
13 | dataset_weight: [1,1,1]
14 | eval_dataset: [SceneFlow, Dreds, HISS, Real_xiaomeng_fxm]
15 | train_batch_size: 16
16 | gradient_accumulation_steps: 1
17 | eval_num_batch: -1
18 | eval_batch_size: 4
19 | lr_warmup_steps: 0
20 | learning_rate: 3e-5
21 | lr_scheduler: constant # linear: almost the same as constant
22 | val_every_global_steps: 1000
23 | save_model_epochs: 3
24 | num_train_timesteps: 1000
25 | num_inference_timesteps: 10
26 | num_intermediate_images: 5
27 | num_inference_rounds: 1
28 | ssi: false
29 | normalize_mode: average
30 | num_chs: 1
31 | ch_bounds: [128.]
32 | ch_gammas: [1.]
33 | noise_strategy: randn
34 | loss_type: mse
35 | prediction_type: v_prediction
36 | sampler: ddpm
37 | num_epochs: 200
38 | cond_channels: left+right+raw
39 | beta_schedule: scaled_linear
40 | beta_start: 0.00085
41 | beta_end: 0.012
42 | mixed_precision: "no"
43 | thresholding: false 
44 | clip_sample: false
45 | block_out_channels: [0] # N/A
46 | 


--------------------------------------------------------------------------------
/conf/task/train_ldm_mixed_cond_rgbd.yaml:
--------------------------------------------------------------------------------
 1 | fdefaults:
 2 |   - cfg
 3 | 
 4 | name: ldm_sf
 5 | resume_pretrained: 
 6 | ldm: true
 7 | depth_channels: 4
 8 | divis_by: 8
 9 | prediction_space: disp
10 | camera_resolution: 320x240 #  WxH
11 | image_size: [240,320] # H,W
12 | train_dataset: [ClearPose, Dreds, HISS] # [Dreds] #
13 | eval_dataset: [ClearPose, Dreds, HISS] # [Dreds] #
14 | dataset_weight: [1, 1, 1] # [1] #
15 | train_batch_size: 16
16 | gradient_accumulation_steps: 1
17 | eval_num_batch: -1
18 | eval_batch_size: 4
19 | lr_warmup_steps: 5000
20 | learning_rate: 3e-5
21 | lr_scheduler: constant # linear: almost the same as constant
22 | val_every_global_steps: 1000
23 | save_model_epochs: 3
24 | num_train_timesteps: 1000
25 | num_inference_timesteps: 10
26 | num_intermediate_images: 5
27 | num_inference_rounds: 1
28 | ssi: false
29 | normalize_mode: average
30 | num_chs: 1
31 | ch_bounds: [64.0]
32 | ch_gammas: [1.]
33 | noise_strategy: randn
34 | loss_type: mse
35 | prediction_type: v_prediction
36 | sampler: ddpm
37 | num_epochs: 200
38 | cond_channels: rgb+raw
39 | beta_schedule: scaled_linear
40 | beta_start: 0.00085
41 | beta_end: 0.012
42 | mixed_precision: "no"
43 | thresholding: false 
44 | clip_sample: false
45 | block_out_channels: [0] # N/A
46 | 


--------------------------------------------------------------------------------
/conf/task/train_clearpose.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: clearpose
 5 | ldm: false
 6 | prediction_space: disp
 7 | resume_pretrained: 
 8 | cond_channels: rgb+raw
 9 | camera_resolution: 320x240 #  WxH
10 | image_size: [240, 320] # H,W
11 | ssi: true
12 | safe_ssi: false
13 | train_dataset: [ClearPose, Dreds, HISS] # [Dreds] #
14 | eval_dataset: [ClearPose, Dreds, HISS] # [Dreds] #
15 | dataset_weight: [1, 1, 1] # [1] #
16 | normalize_mode: average
17 | ch_bounds: [64.]
18 | ch_gammas: [1.]
19 | num_chs: 1
20 | norm_s: 2
21 | norm_t: 0.5
22 | train_batch_size: 12 # 32 works for 224x126
23 | eval_num_batch: -1
24 | eval_batch_size: 32
25 | lr_warmup_steps: 5000
26 | learning_rate: 0.0001
27 | lr_scheduler: constant
28 | gradient_accumulation_steps: 1
29 | val_every_global_steps: 5000
30 | save_model_epochs: 5
31 | num_train_timesteps: 128
32 | num_inference_timesteps: 8
33 | num_intermediate_images: 4
34 | num_inference_rounds: 1
35 | block_out_channels: [128, 128, 256, 256, 512, 512]
36 | noise_strategy: pyramid
37 | loss_type: mse
38 | prediction_type: sample
39 | num_epochs: 200
40 | depth_channels: 1
41 | beta_schedule: squaredcos_cap_v2
42 | beta_start: 0.0001
43 | beta_end: 0.02
44 | sampler: my_ddpm
45 | mixed_precision: "no"
46 | thresholding: true 
47 | dynamic_thresholding_ratio: 0.995
48 | clip_sample: true 
49 | clip_sample_range: 1.0


--------------------------------------------------------------------------------
/conf/task/train_ldm_mixed_gapartnet.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: ldm_mixed_gapartnet
 5 | resume_pretrained: 
 6 | ldm: true
 7 | depth_channels: 4
 8 | divis_by: 8
 9 | prediction_space: disp
10 | camera_resolution: 320x180 #  W,H
11 | # camera_resolution: 480x270 #  W,H
12 | image_size: [180,320] # H,W
13 | # image_size: [270,480] # H,W
14 | train_dataset: [SceneFlow, Dreds, HISS, Gapartnet2]
15 | eval_dataset: [SceneFlow, Dreds, HISS, Gapartnet2, Real]
16 | dataset_weight: [1, 1, 1, 1] 
17 | train_batch_size: 16
18 | gradient_accumulation_steps: 1
19 | eval_num_batch: 10
20 | eval_batch_size: 4
21 | lr_warmup_steps: 0
22 | learning_rate: 3e-5
23 | lr_scheduler: constant # linear: almost the same as constant
24 | val_every_global_steps: 1000
25 | save_model_epochs: 3
26 | num_train_timesteps: 1000
27 | num_inference_timesteps: 10
28 | num_intermediate_images: 5
29 | num_inference_rounds: 1
30 | ssi: false
31 | normalize_mode: average
32 | num_chs: 1
33 | ch_bounds: [128.]
34 | ch_gammas: [1.]
35 | noise_strategy: randn
36 | loss_type: mse
37 | prediction_type: v_prediction
38 | sampler: ddpm
39 | num_epochs: 200
40 | cond_channels: left+right+raw
41 | beta_schedule: scaled_linear
42 | beta_start: 0.00085
43 | beta_end: 0.012
44 | mixed_precision: "no"
45 | thresholding: false 
46 | clip_sample: false
47 | block_out_channels: [0] # N/A
48 | 


--------------------------------------------------------------------------------
/conf/task/train_syntodd_rgbd.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - cfg
 3 | 
 4 | name: syntodd_rgbd
 5 | ldm: false
 6 | prediction_space: disp
 7 | resume_pretrained: 
 8 | cond_channels: rgb+raw
 9 | camera_resolution: 320x240 #  WxH
10 | image_size: [240, 320] # H,W
11 | ssi: true
12 | safe_ssi: false
13 | train_dataset: [SynTODDRgbd] #
14 | eval_dataset: [SynTODDRgbd] #
15 | dataset_variant: simdepth # "simdepth", "erodedepth", "dilatedepth"
16 | dataset_weight: [1] # [1] #
17 | normalize_mode: average
18 | ch_bounds: [64.]
19 | ch_gammas: [1.]
20 | num_chs: 1
21 | norm_s: 2
22 | norm_t: 0.5
23 | train_batch_size: 12 # 32 works for 224x126
24 | eval_num_batch: -1
25 | eval_batch_size: 32
26 | lr_warmup_steps: 5000
27 | learning_rate: 0.0001
28 | lr_scheduler: constant
29 | gradient_accumulation_steps: 1
30 | val_every_global_steps: 5000
31 | save_model_epochs: 5
32 | num_train_timesteps: 128
33 | num_inference_timesteps: 8
34 | num_intermediate_images: 4
35 | num_inference_rounds: 1
36 | block_out_channels: [128, 128, 256, 256, 512, 512]
37 | noise_strategy: pyramid
38 | loss_type: mse
39 | prediction_type: sample
40 | num_epochs: 200
41 | depth_channels: 1
42 | beta_schedule: squaredcos_cap_v2
43 | beta_start: 0.0001
44 | beta_end: 0.02
45 | sampler: my_ddpm
46 | mixed_precision: "no"
47 | thresholding: true 
48 | dynamic_thresholding_ratio: 0.995
49 | clip_sample: true 
50 | clip_sample_range: 1.0


--------------------------------------------------------------------------------
/isaacsim/render.py:
--------------------------------------------------------------------------------
 1 | """Generate infrared rendering using replicator
 2 | """
 3 | import json
 4 | import math
 5 | import os
 6 | import random
 7 | import sys
 8 | 
 9 | import carb
10 | import yaml
11 | from omni.isaac.kit import SimulationApp
12 | 
13 | from omegaconf import DictConfig, OmegaConf
14 | from hydra import compose, initialize
15 | import hydra
16 | 
17 | # hydra: load config
18 | with initialize(version_base=None, config_path="config", job_name="replicator_ir"):
19 |     cfg = compose(config_name="hssd.yaml" , overrides=sys.argv[1:])
20 | 
21 | if cfg["seed"] >= 0:
22 |     random.seed(cfg["seed"])
23 | 
24 | # start simulation
25 | _app = SimulationApp(launch_config=cfg['launch_config'])
26 | _Log = _app.app.print_and_log
27 | 
28 | from omni.isaac.core import World
29 | from replicator import IRReplicator
30 | 
31 | # main program 
32 | def run(cfg: DictConfig) -> None:
33 |     _Log("start running")
34 |     _world = World(set_defaults=True) #**cfg['world'],  
35 |     _world.set_simulation_dt(**cfg["world"])
36 |     
37 |     # start replicator
38 |     rep = IRReplicator(_app, _world, cfg)
39 |     rep.start()
40 | 
41 |     _Log("keep GUI running if headless is False")
42 |     while _app.is_running() and not cfg['launch_config']['headless']:
43 |         _world.step(render=True)
44 | 
45 |     _app.close()
46 | 
47 | if __name__ == "__main__":
48 |     run(cfg)
49 | 


--------------------------------------------------------------------------------
/isaacsim/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Data Generation in simulation 
 3 | 
 4 | Although we do not plan to release all the sources for generating `HISS` dataset, I want to share an example code of generating IR renderings using [IsaacSim 4.0.0](https://docs.isaacsim.omniverse.nvidia.com/4.0.0/installation/install_container.html).
 5 | 
 6 | > This code should also work on Newer version of Isaac Sim with very few changes. If you encounter any problem please feel free to contact me.
 7 | 
 8 | 
 9 | ### 1. prepare data
10 | 
11 | + Download [HSSD scenes](https://huggingface.co/datasets/hssd/hssd-scenes) from here
12 | 
13 |     Notice that HSSD scenes are very big, you can download some of them for using.
14 |     
15 |     eg., I set [107734119_175999932](https://huggingface.co/datasets/hssd/hssd-scenes/blob/main/scenes/107734119_175999932.glb) as the default scene in `config/hssd.yaml`
16 | 
17 |     Please first convert it to USD file using [USD composer](https://docs.omniverse.nvidia.com/composer/latest/index.html).
18 | 
19 | + Download object cad models from dreds, [link](https://mirrors.pku.edu.cn/dl-release/DREDS_ECCV2022/data/cad_model/)
20 | 
21 | + Download NVIDIA Omniverse [vMaterials_2](https://developer.nvidia.com/vmaterials)
22 | 
23 | 
24 | Put them all in `data` folder, example folder structure:
25 | 
26 | ```
27 | data
28 | ├── dreds
29 | │   ├── cad_model
30 | │   │   ├── 00000000
31 | │   │   ├── 02691156
32 | │   │   ├── 02876657
33 | │   │   ├── 02880940
34 | │   │   ├── 02942699
35 | │   │   ├── 02946921
36 | │   │   ├── 02954340
37 | │   │   ├── 02958343
38 | │   │   ├── 02992529
39 | │   │   └── 03797390
40 | │   └── output
41 | ├── hssd
42 | │   └── scenes
43 | │       └── 107734119_175999932
44 | └── vMaterials_2
45 |     ├── Carpet
46 |     .....
47 | ```
48 | 
49 | ### 2. start isaac sim 4.0.0 Container
50 | 
51 | Change your project dir and start isaac-sim container
52 | 
53 | ```
54 | docker run --name isaac-sim --entrypoint bash -it --runtime=nvidia --gpus all -e "ACCEPT_EULA=Y" --rm --network=host \
55 |     -e "PRIVACY_CONSENT=Y" \
56 |     -v ~/workspace/projects/d3roma/isaacsim:/root/d3roma:rw \
57 |     -v ~/docker/isaac-sim/cache/kit:/isaac-sim/kit/cache:rw \
58 |     -v ~/docker/isaac-sim/cache/ov:/root/.cache/ov:rw \
59 |     -v ~/docker/isaac-sim/cache/pip:/root/.cache/pip:rw \
60 |     -v ~/docker/isaac-sim/cache/glcache:/root/.cache/nvidia/GLCache:rw \
61 |     -v ~/docker/isaac-sim/cache/computecache:/root/.nv/ComputeCache:rw \
62 |     -v ~/docker/isaac-sim/logs:/root/.nvidia-omniverse/logs:rw \
63 |     -v ~/docker/isaac-sim/data:/root/.local/share/ov/data:rw \
64 |     -v ~/docker/isaac-sim/documents:/root/Documents:rw \
65 |     nvcr.io/nvidia/isaac-sim:4.0.0
66 | ```
67 | 
68 | ### 3. install python packages into isaac-sim
69 | 
70 | ```
71 | /isaac-sim/python.sh -m pip install -r requirements.txt
72 | ```
73 | 
74 | ### 4. generate IR renderings
75 | ```
76 | cd /root/d3roma
77 | /isaac-sim/python.sh render.py
78 | ```
79 | 
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/utils/losess.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helpers for various likelihood-based losses. These are ported from the original
 3 | Ho et al. diffusion models codebase:
 4 | https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py
 5 | """
 6 | 
 7 | import numpy as np
 8 | import torch as th
 9 | 
10 | def mse_to_vlb(t, mse, logvar_clipped):
11 |     """ t: bs
12 |         mse: bs
13 |     """
14 |     if t == 0:
15 |         return discretized_gaussian_log_likelihood()
16 |     else:
17 |         return 0.5 * (
18 |         # -1.0
19 |         # + logvar2
20 |         # - logvar1
21 |         # + th.exp(logvar1 - logvar2)
22 |         + mse * th.exp(-logvar_clipped[t]) / np.log(2.0)
23 |     )
24 | 
25 | def normal_kl(mean1, logvar1, mean2, logvar2):
26 |     """
27 |     Compute the KL divergence between two gaussians.
28 | 
29 |     Shapes are automatically broadcasted, so batches can be compared to
30 |     scalars, among other use cases.
31 |     """
32 |     tensor = None
33 |     for obj in (mean1, logvar1, mean2, logvar2):
34 |         if isinstance(obj, th.Tensor):
35 |             tensor = obj
36 |             break
37 |     assert tensor is not None, "at least one argument must be a Tensor"
38 | 
39 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
40 |     # Tensors, but it does not work for th.exp().
41 |     logvar1, logvar2 = [
42 |         x if isinstance(x, th.Tensor) else th.tensor(x).to(tensor)
43 |         for x in (logvar1, logvar2)
44 |     ]
45 | 
46 |     return 0.5 * (
47 |         -1.0
48 |         + logvar2
49 |         - logvar1
50 |         + th.exp(logvar1 - logvar2)
51 |         + ((mean1 - mean2) ** 2) * th.exp(-logvar2)
52 |     )
53 | 
54 | 
55 | def approx_standard_normal_cdf(x):
56 |     """
57 |     A fast approximation of the cumulative distribution function of the
58 |     standard normal.
59 |     """
60 |     return 0.5 * (1.0 + th.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * th.pow(x, 3))))
61 | 
62 | 
63 | def discretized_gaussian_log_likelihood(x, *, means, log_scales):
64 |     """
65 |     Compute the log-likelihood of a Gaussian distribution discretizing to a
66 |     given image.
67 | 
68 |     :param x: the target images. It is assumed that this was uint8 values,
69 |               rescaled to the range [-1, 1].
70 |     :param means: the Gaussian mean Tensor.
71 |     :param log_scales: the Gaussian log stddev Tensor.
72 |     :return: a tensor like x of log probabilities (in nats).
73 |     """
74 |     assert x.shape == means.shape == log_scales.shape
75 |     centered_x = x - means
76 |     inv_stdv = th.exp(-log_scales)
77 |     plus_in = inv_stdv * (centered_x + 1.0 / 255.0)
78 |     cdf_plus = approx_standard_normal_cdf(plus_in)
79 |     min_in = inv_stdv * (centered_x - 1.0 / 255.0)
80 |     cdf_min = approx_standard_normal_cdf(min_in)
81 |     log_cdf_plus = th.log(cdf_plus.clamp(min=1e-12))
82 |     log_one_minus_cdf_min = th.log((1.0 - cdf_min).clamp(min=1e-12))
83 |     cdf_delta = cdf_plus - cdf_min
84 |     log_probs = th.where(
85 |         x < -0.999,
86 |         log_cdf_plus,
87 |         th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))),
88 |     )
89 |     assert log_probs.shape == x.shape
90 |     return log_probs
91 | 


--------------------------------------------------------------------------------
/scripts/check_sceneflow.py:
--------------------------------------------------------------------------------
 1 | import hydra
 2 | from omegaconf import DictConfig, OmegaConf
 3 | from hydra.core.config_store import ConfigStore
 4 | from config import Config, TrainingConfig, setup_hydra_configurations
 5 | from data.data_loader import fetch_dataloader
 6 | from utils.utils import seed_everything
 7 | from accelerate import Accelerator
 8 | from accelerate.logging import get_logger
 9 | from tqdm import tqdm
10 | from utils.utils import Normalizer
11 | from utils.frame_utils import read_gen
12 | import torch.nn.functional as F
13 | import shutil
14 | 
15 | import torch
16 | import numpy as np
17 | from PIL import Image
18 | 
19 | import os
20 | logger = get_logger(__name__, log_level="INFO") # multi-process logging
21 | 
22 | Accelerator() # hack: enable logging
23 | 
24 | @hydra.main(version_base=None, config_path="conf", config_name="config.yaml")
25 | def check(config: Config):
26 |     cfg = config.task
27 |     logger.info(cfg.train_dataset)
28 | 
29 |     train_dataloader, val_dataloader_lst = fetch_dataloader(cfg)
30 |     logger.info(val_dataloader_lst[0].dataset.__class__.__name__)
31 |     
32 |     all_dataloaders = [train_dataloader]
33 |     all_dataloaders.extend(val_dataloader_lst)
34 | 
35 |     count = 0 
36 |     bads = {}
37 | 
38 |     for i, dataloader in enumerate([train_dataloader]): # all_dataloaders, val_dataloader_lst
39 |         pbar = tqdm(total=len(dataloader))
40 |         for j, data in enumerate(dataloader):
41 |             # print(data.keys())
42 |             B = data['mask'].shape[0]
43 |             for b in range(B):
44 |                 # rgb = data['normalized_rgb'][b]
45 |                 index = data['index'][b]
46 |                 path = data['path'][b]
47 | 
48 |                 raw_left = path.replace("disparity", "raw_cleanpass").replace("pfm", "png").replace("right", "left")
49 |                 # raw_right= path.replace("disparity", "raw_finalpass").replace("pfm", "png").replace("left", "right")
50 | 
51 |                 raw_left = np.array(read_gen(raw_left))
52 |                 gt_left = np.array(read_gen(path))
53 | 
54 |                 TP = ((raw_left > 0) & (np.abs(gt_left - raw_left) <= 2)).sum()
55 |                 FP = ((raw_left > 0) & (np.abs(gt_left - raw_left) > 2)).sum()
56 |                 FN = ((raw_left == 0) & (np.abs(gt_left - raw_left) <= 2)).sum()
57 |                 precision = TP / (TP + FP)
58 |                 recall = TP / (TP + FN) # biased
59 | 
60 |                 # raw_right = read_gen(raw_right)
61 |                                  
62 |                 # if precision < 0.6 and recall < 0.7:
63 |                 if precision < 0.2:
64 |                     bads[path] = precision
65 |                     logger.info(f"bad image {index}: {path}")
66 | 
67 |                     if True:
68 |                         dump_dir = "./bad_sim"
69 |                         shutil.copy2(path, f"{dump_dir}/{j}_{b}_disp.pfm")
70 |                         shutil.copy2(path.replace("disparity", "raw_finalpass").replace("pfm", "png"), f"{dump_dir}/{j}_{b}_raw.png")
71 |                         shutil.copy2(path.replace("disparity", "raw_cleanpass").replace("pfm", "png"), f"{dump_dir}/{j}_{b}_raw_clean.png")
72 |                         shutil.copy2(path.replace("disparity", "frames_finalpass").replace("pfm", "png"), f"{dump_dir}/{j}_{b}_left.png")
73 |                         shutil.copy2(path.replace("disparity", "frames_finalpass").replace("pfm", "png").replace("left", "right"), f"{dump_dir}/{j}_{b}_right.png")
74 |                 
75 |                     count += 1
76 |         
77 |             pbar.update(1)
78 | 
79 |     logger.info(f"how many bad images? {len(bads.items())}")
80 |     with open(f'bad_his.txt', 'w') as f:
81 |         for path,epe in bads.items():
82 |             f.write(f"{path} {epe}\n")
83 | 
84 | if __name__ == "__main__":
85 |     seed_everything(0)
86 |     setup_hydra_configurations()
87 |     check()


--------------------------------------------------------------------------------
/isaacsim/config/hssd.yaml:
--------------------------------------------------------------------------------
  1 | launch_config:
  2 |   renderer: PathTracing #RayTracedLighting #
  3 |   headless: true #  false # 
  4 | 
  5 | # Controls lightings for rendering images, 
  6 | # rgb: color image only
  7 | # ir: ir depth image only 
  8 | # rgb+ir: iteratively render rgb and ir images
  9 | # na: don't render images with replicators
 10 | render_mode: rgb+ir #  gt+rgb+ir # rgb+ir # rgb # ir # 
 11 | 
 12 | # Controls the simulation mode
 13 | # layout_n_capture: init scene and capture images then quit
 14 | # load_n_render: TODO load scene and render images
 15 | # simulate: normal simulation mode
 16 | 
 17 | sim_mode: load_n_render #  layout_n_capture # simulate #   
 18 | 
 19 | resume_scene: 
 20 | 
 21 | robot:
 22 |   name: "franka.yml" #"galbot_zero_lefthand.yml" #
 23 |   init_pose: [-0.2, 0., 0., 1, 0, 0, 0] #[0.0, 0.5, 0.0] # usually look at, , 0.707, 0.0, 0.0, -0.707
 24 | 
 25 | scene: empty #hssd # 
 26 | layout: part # dreds #  graspnet  # 
 27 | 
 28 | dreds:
 29 |   cad_model_dir: data/dreds
 30 |   layout_offset: [0.2, 0.0, 0.0]
 31 | 
 32 | graspnet:
 33 |   root_path: data/graspnet
 34 |   layout_offset: [0.5, 0.2, 0.0]
 35 | 
 36 | hssd:
 37 |   data_dir: data/hssd/scenes
 38 |   name: "107734119_175999932"
 39 |   default_prim_path: "/World/scene"
 40 |   scale: 1
 41 |   hide_ceilings: true
 42 |   hide_walls: false
 43 |   center_offset: [0.0, 0.0, 0.0] # [0.0, 0.0, 0.0]
 44 |   surface: 
 45 |     category: teatable
 46 |     prim_path: /World/furniture/node_b914fb6bcc81386bfa1ff7a3eb8412b7ac581ff
 47 |     stt: false # specular or transparent, translucent surface
 48 | 
 49 | seed: -1 #  set to >= 0 to disable domain randomization
 50 | rt_subframes: 8
 51 | num_frames_per_surface: 3
 52 | visualize: false
 53 | render_after_quiet: true
 54 | shadow: off
 55 | 
 56 | viewport:
 57 |   record: false
 58 | 
 59 | world:
 60 |   physics_dt: 0.016666667 # 0.01 #
 61 |   rendering_dt: 0.016666667  #0.005 #
 62 | 
 63 | depth_sensor:
 64 |   name: realsense
 65 |   clipping_range: [0.1, 5]
 66 |   focal_length: 1.88
 67 |   # horizontal_aperture: 26.42033
 68 |   # vertical_aperture: 14.86144
 69 |   fov: 71.28
 70 |   resolution: [640, 360] # [1280, 720] # 
 71 |   placement: # baseline = 0.055
 72 |     rgb_to_left_ir: 0.0 # 0.015 # 
 73 |     rgb_to_right_ir: 0.055 # 0.070 # 
 74 |     rgb_to_projector: 0.0410 # 0.0425 # 
 75 |   projector:
 76 |     intensity: 5
 77 |     exposure: -1.0
 78 | 
 79 | replicator: std_obj # graspnet #   glass, articulated_obj
 80 | domain_randomization: true
 81 | 
 82 | lighting:
 83 |   light_type: [Sphere] # Rect # Disk # disk_light #
 84 |   range:  #@see https://zh.wikipedia.org/zh-cn/%E7%90%83%E5%BA%A7%E6%A8%99%E7%B3%BB
 85 |     theta: [30, 90]
 86 |     phi: [-60, 60]
 87 |     radius: [1, 2]
 88 | 
 89 |   Distant_light:
 90 |     intensity: 0
 91 | 
 92 |   Sphere_light:
 93 |     radius: [1, 1] #[0.5, 1.0]
 94 |     height: [2.5, 2.5] #[1.5, 2]
 95 |     intensity: 
 96 |       "on": [10000, 10000] # [7500, 11000]
 97 |       "off": [500, 500] # [200, 400]
 98 |     treatAsPoint: true
 99 | 
100 |   Disk_light:
101 |     radius: [1,1] # [0.5, 1.0]
102 |     height:  [1.5,1.5] #[1.5, 2]
103 |     intensity: 
104 |       "on": [10000, 10000] #[6000, 9000]
105 |       "off": [200, 400]
106 | 
107 |   Rect_light:
108 |     width: [100, 100]
109 |     height: [100, 100]
110 |     intensity: 
111 |       "on": [50000, 50000]
112 |       "off": [2000, 2000] 
113 | 
114 | specular: 
115 |   reflection_roughness_constant: [0.05, 0.2] # < 0.4
116 |   metallic_constant: [0.8, 0.99] # > 0.9
117 |   reflection_color: [0.0, 1.0]
118 | 
119 | transparent:
120 |   roughness_constant: [0.1, 0.1] # 0.05
121 |   cutout_opacity:  [0.1, 0.2] # [0.6, 0.7] # [0.2, 0.3] # < 0.4
122 |   thin_walled: false #true
123 |   glass_ior: [1.4, 1.6] # ~3,  default: 1.491
124 |   frosting_roughness: [0.2, 0.3] # < 0.1, grayscale only
125 | 
126 | glass:
127 |   base_alpha: [0.0, 1.0]
128 |   ior:  [1.4, 1.6]
129 |   metallic_factor: [0.0, 0.35]
130 |   roughness_factor: [0.0, 0.1]
131 | 
132 | scope_name: /MyScope
133 | writer: on # off # BasicWriter
134 | writer_config:
135 |   output_dir: output_ir
136 |   start_sequence_id: -1 # -1 means continue from the existing frames, otherwise start with specified frame id
137 |   rgb: true
138 |   disparity: true
139 |   normals: true # TODO
140 |   # disparity: true
141 |   # bounding_box_2d_tight: false
142 |   semantic_segmentation: true
143 |   distance_to_image_plane: true
144 |   pointcloud: false
145 |   # bounding_box_3d: false
146 |   # occlusion: false
147 | clear_previous_semantics: true
148 | 
149 | hydra:
150 |   run:
151 |     dir: _outputs/${hydra.job.name}
152 |   job:
153 |     chdir: true
154 | 
155 | 


--------------------------------------------------------------------------------
/data/dataset.py:
--------------------------------------------------------------------------------
 1 | from torchvision.transforms import RandomResizedCrop, InterpolationMode
 2 | import torchvision.transforms.functional as TF
 3 | import torch 
 4 | import functools
 5 | 
 6 | class WarpDataset(torch.utils.data.Dataset):
 7 |     def __init__(self, image_size, augment):
 8 |         self.augment = augment
 9 |         self.rgb_list = []
10 |         self.depth_list = []
11 |         self.lr_list = []
12 |         self.mask_list = []
13 |         
14 |         if self.augment is None:
15 |             self.augment = dict()
16 |         if type(image_size) == int:
17 |             self.image_size = (image_size, image_size) # H x W
18 |         elif type(image_size) == tuple:
19 |             self.image_size = image_size
20 |         else:
21 |             raise ValueError("image_size must be int or tuple")
22 |         return 
23 |     
24 |     def data_aug(self, rgb, depth, mask, img1=None, img2=None, raw_depth=None):
25 |         # random crop and resize. 
26 |         safe_apply = lambda func, x: func(x) if x is not None else None
27 |         if 'resizedcrop' in self.augment.keys():
28 |             param = self.augment['resizedcrop']
29 |             i, j, h, w = RandomResizedCrop.get_params(rgb, scale=param['scale'], ratio=param['ratio'])
30 |             resized_crop = lambda i, j, h, w, size, interp, x: TF.resized_crop(x, i, j, h, w, size=size, interpolation=interp)
31 |             resized_crop_fn = functools.partial(resized_crop, i,j,h,w,self.image_size, InterpolationMode.NEAREST)
32 |             rgb, mask, depth, img1, img2 = map(lambda x: safe_apply(resized_crop_fn, x), [rgb, mask, depth, img1, img2])
33 | 
34 |             """ rgb =  TF.resized_crop(rgb, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST)
35 |             mask = TF.resized_crop(mask, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST)
36 |             depth = TF.resized_crop(depth, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST)
37 |             if img1 is not None:
38 |                 img1 =  TF.resized_crop(img1, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST)
39 |                 img2 = TF.resized_crop(img2, i, j, h, w, size=self.image_size, interpolation=InterpolationMode.NEAREST) """
40 |         else:   # only resize when eval and test
41 |             resize = lambda size, interp, x: TF.resize(x, size=size, interpolation=interp)
42 |             resize_fn = functools.partial(resize, self.image_size, InterpolationMode.NEAREST)
43 |             rgb, mask, depth, img1, img2 = map(lambda x: safe_apply(resize_fn, x), [rgb, mask, depth, img1, img2])
44 | 
45 |             # rgb = TF.resize(rgb, size=self.image_size, interpolation=InterpolationMode.NEAREST)
46 |             # mask = TF.resize(mask, size=self.image_size, interpolation=InterpolationMode.NEAREST)
47 |             # depth = TF.resize(depth, size=self.image_size, interpolation=InterpolationMode.NEAREST)
48 |             # if img1 is not None:
49 |             #     img1 = TF.resize(img1, size=self.image_size, interpolation=InterpolationMode.NEAREST)
50 |             #     img2 = TF.resize(img2, size=self.image_size, interpolation=InterpolationMode.NEAREST)
51 | 
52 |         # Random hflip
53 |         if 'hflip' in self.augment.keys():
54 |             param = self.augment['hflip']
55 |             if torch.rand(1) < 0.5: #param['prob']:
56 |                 rgb, mask, depth, img1, img2 = map(lambda x: safe_apply(TF.hflip, x), [rgb, mask, depth, img1, img2])
57 |                 """ rgb = TF.hflip(rgb)
58 |                 mask = TF.hflip(mask)
59 |                 depth = TF.hflip(depth)
60 |                 if img1 is not None:
61 |                     img1 = TF.hflip(img1)
62 |                     img2 = TF.hflip(img2) """
63 |                     
64 |         # TODO add color augmentation such as changing the lighting 
65 |              
66 |         if img1 is None:   
67 |             return rgb, depth, mask
68 |         else:
69 |             return rgb, depth, mask, img1, img2
70 |     
71 |       
72 |     def normalize_depth(self, depth, mask, low_p=0.00, high_p=1.00):
73 |         """ low_p, high_p: low and high percentile to normalize the depth"""
74 |         mask = mask.bool()
75 |         masked_depth = depth[mask]
76 |         low, high = torch.quantile(masked_depth, torch.tensor((low_p, high_p)))
77 | 
78 |         depth = (depth - low) / (high - low)
79 |         depth = (depth - 0.5) * 2   # [0,1] -> [-1, 1]
80 |         return depth
81 | 
82 |     def normalize_rgb(self, rgb):
83 |         return (rgb / 255 - 0.5) * 2 # [0,1] -> [-1, 1]
84 |     
85 |     def __mul__(self, v):
86 |         self.rgb_list = v * self.rgb_list
87 |         self.depth_list = v * self.depth_list
88 |         self.lr_list = v * self.lr_list
89 |         self.mask_list = v * self.mask_list
90 |         return self
91 |     
92 |     def __len__(self):
93 |         return len(self.rgb_list)
94 |             


--------------------------------------------------------------------------------
/core/praser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import OrderedDict
  3 | import json
  4 | from pathlib import Path
  5 | from datetime import datetime
  6 | from functools import partial
  7 | import importlib
  8 | from types  import FunctionType
  9 | import shutil
 10 | def init_obj(opt, logger, *args, default_file_name='default file', given_module=None, init_type='Network', **modify_kwargs):
 11 |     """
 12 |     finds a function handle with the name given as 'name' in config,
 13 |     and returns the instance initialized with corresponding args.
 14 |     """ 
 15 |     if opt is None or len(opt)<1:
 16 |         logger.info('Option is None when initialize {}'.format(init_type))
 17 |         return None
 18 |     
 19 |     ''' default format is dict with name key '''
 20 |     if isinstance(opt, str):
 21 |         opt = {'name': opt}
 22 |         logger.warning('Config is a str, converts to a dict {}'.format(opt))
 23 | 
 24 |     name = opt['name']
 25 |     ''' name can be list, indicates the file and class name of function '''
 26 |     if isinstance(name, list):
 27 |         file_name, class_name = name[0], name[1]
 28 |     else:
 29 |         file_name, class_name = default_file_name, name
 30 |     try:
 31 |         if given_module is not None:
 32 |             module = given_module
 33 |         else:
 34 |             module = importlib.import_module(file_name)
 35 |         
 36 |         attr = getattr(module, class_name)
 37 |         kwargs = opt.get('args', {})
 38 |         kwargs.update(modify_kwargs)
 39 |         ''' import class or function with args '''
 40 |         if isinstance(attr, type): 
 41 |             ret = attr(*args, **kwargs)
 42 |             ret.__name__  = ret.__class__.__name__
 43 |         elif isinstance(attr, FunctionType): 
 44 |             ret = partial(attr, *args, **kwargs)
 45 |             ret.__name__  = attr.__name__
 46 |             # ret = attr
 47 |         logger.info('{} [{:s}() form {:s}] is created.'.format(init_type, class_name, file_name))
 48 |     except:
 49 |         raise NotImplementedError('{} [{:s}() form {:s}] not recognized.'.format(init_type, class_name, file_name))
 50 |     return ret
 51 | 
 52 | 
 53 | def mkdirs(paths):
 54 |     if isinstance(paths, str):
 55 |         os.makedirs(paths, exist_ok=True)
 56 |     else:
 57 |         for path in paths:
 58 |             os.makedirs(path, exist_ok=True)
 59 | 
 60 | def get_timestamp():
 61 |     return datetime.now().strftime('%y%m%d_%H%M%S')
 62 | 
 63 | 
 64 | def write_json(content, fname):
 65 |     fname = Path(fname)
 66 |     with fname.open('wt') as handle:
 67 |         json.dump(content, handle, indent=4, sort_keys=False)
 68 | 
 69 | class NoneDict(dict):
 70 |     def __missing__(self, key):
 71 |         return None
 72 | 
 73 | def dict_to_nonedict(opt):
 74 |     """ convert to NoneDict, which return None for missing key. """
 75 |     if isinstance(opt, dict):
 76 |         new_opt = dict()
 77 |         for key, sub_opt in opt.items():
 78 |             new_opt[key] = dict_to_nonedict(sub_opt)
 79 |         return NoneDict(**new_opt)
 80 |     elif isinstance(opt, list):
 81 |         return [dict_to_nonedict(sub_opt) for sub_opt in opt]
 82 |     else:
 83 |         return opt
 84 | 
 85 | def dict2str(opt, indent_l=1):
 86 |     """ dict to string for logger """
 87 |     msg = ''
 88 |     for k, v in opt.items():
 89 |         if isinstance(v, dict):
 90 |             msg += ' ' * (indent_l * 2) + k + ':[\n'
 91 |             msg += dict2str(v, indent_l + 1)
 92 |             msg += ' ' * (indent_l * 2) + ']\n'
 93 |         else:
 94 |             msg += ' ' * (indent_l * 2) + k + ': ' + str(v) + '\n'
 95 |     return msg
 96 | 
 97 | def parse(args):
 98 |     json_str = ''
 99 |     with open(args.config, 'r') as f:
100 |         for line in f:
101 |             line = line.split('//')[0] + '\n'
102 |             json_str += line
103 |     opt = json.loads(json_str, object_pairs_hook=OrderedDict)
104 | 
105 |     ''' replace the config context using args '''
106 |     opt['phase'] = args.phase
107 |     if args.gpu_ids is not None:
108 |         opt['gpu_ids'] = [int(id) for id in args.gpu_ids.split(',')]
109 |     if args.batch is not None:
110 |         opt['datasets'][opt['phase']]['dataloader']['args']['batch_size'] = args.batch
111 |  
112 |     ''' set cuda environment '''
113 |     if len(opt['gpu_ids']) > 1:
114 |         opt['distributed'] = True
115 |     else:
116 |         opt['distributed'] = False
117 | 
118 |     ''' update name '''
119 |     if args.debug:
120 |         opt['name'] = 'debug_{}'.format(opt['name'])
121 |     elif opt['finetune_norm']:
122 |         opt['name'] = 'finetune_{}'.format(opt['name'])
123 |     else:
124 |         opt['name'] = '{}_{}'.format(opt['phase'], opt['name'])
125 | 
126 |     ''' set log directory '''
127 |     experiments_root = os.path.join(opt['path']['base_dir'], '{}_{}'.format(opt['name'], get_timestamp()))
128 |     mkdirs(experiments_root)
129 | 
130 |     ''' save json '''
131 |     write_json(opt, '{}/config.json'.format(experiments_root))
132 | 
133 |     ''' change folder relative hierarchy '''
134 |     opt['path']['experiments_root'] = experiments_root
135 |     for key, path in opt['path'].items():
136 |         if 'resume' not in key and 'base' not in key and 'root' not in key:
137 |             opt['path'][key] = os.path.join(experiments_root, path)
138 |             mkdirs(opt['path'][key])
139 | 
140 |     ''' debug mode '''
141 |     if 'debug' in opt['name']:
142 |         opt['train'].update(opt['debug'])
143 | 
144 |     ''' code backup ''' 
145 |     for name in os.listdir('.'):
146 |         if name in ['config', 'models', 'core', 'slurm', 'data']:
147 |             shutil.copytree(name, os.path.join(opt['path']['code'], name), ignore=shutil.ignore_patterns("*.pyc", "__pycache__"))
148 |         if '.py' in name or '.sh' in name:
149 |             shutil.copy(name, opt['path']['code'])
150 |     return dict_to_nonedict(opt)
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/core/resample.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | 
  3 | import numpy as np
  4 | import torch as th
  5 | import torch.distributed as dist
  6 | 
  7 | 
  8 | def create_named_schedule_sampler(name, T, *args):
  9 |     """
 10 |     Create a ScheduleSampler from a library of pre-defined samplers.
 11 | 
 12 |     :param name: the name of the sampler.
 13 |     :param diffusion: the diffusion object to sample for.
 14 |     """
 15 |     if name == "uniform":
 16 |         return UniformSampler(T)
 17 |     elif name == "snr":
 18 |         return SNRSampler(T, *args)
 19 |     elif name == "loss-second-moment":
 20 |         return LossSecondMomentResampler(T)
 21 |     else:
 22 |         raise NotImplementedError(f"unknown schedule sampler: {name}")
 23 | 
 24 | 
 25 | class ScheduleSampler(ABC):
 26 | 
 27 |     """
 28 |     A distribution over timesteps in the diffusion process, intended to reduce
 29 |     variance of the objective.
 30 | 
 31 |     By default, samplers perform unbiased importance sampling, in which the
 32 |     objective's mean is unchanged.
 33 |     However, subclasses may override sample() to change how the resampled
 34 |     terms are reweighted, allowing for actual changes in the objective.
 35 |     """
 36 | 
 37 |     @abstractmethod
 38 |     def weights(self):
 39 |         """
 40 |         Get a numpy array of weights, one per diffusion step.
 41 | 
 42 |         The weights needn't be normalized, but must be positive.
 43 |         """
 44 | 
 45 |     def sample(self, batch_size, device):
 46 |         """
 47 |         Importance-sample timesteps for a batch.
 48 | 
 49 |         :param batch_size: the number of timesteps.
 50 |         :param device: the torch device to save to.
 51 |         :return: a tuple (timesteps, weights):
 52 |                  - timesteps: a tensor of timestep indices.
 53 |                  - weights: a tensor of weights to scale the resulting losses.
 54 |         """
 55 |         w = self.weights()
 56 |         p = w / np.sum(w)
 57 |         indices_np = np.random.choice(len(p), size=(batch_size,), p=p)
 58 |         indices = th.from_numpy(indices_np).long().to(device)
 59 |         weights_np = 1 / (len(p) * p[indices_np])
 60 |         weights = th.from_numpy(weights_np).float().to(device)
 61 |         return indices, weights
 62 | 
 63 | 
 64 | class UniformSampler(ScheduleSampler):
 65 |     def __init__(self, T):
 66 |         self.T = T
 67 |         self._weights = np.ones([T])
 68 | 
 69 |     def weights(self):
 70 |         return self._weights
 71 | 
 72 | class SNRSampler(ScheduleSampler):
 73 |     def __init__(self, snr):
 74 |         self._snr = snr
 75 | 
 76 |     def weights(self):
 77 |         return self._snr
 78 | 
 79 | class LossAwareSampler(ScheduleSampler):
 80 |     def update_with_local_losses(self, local_ts, local_losses):
 81 |         """
 82 |         Update the reweighting using losses from a model.
 83 | 
 84 |         Call this method from each rank with a batch of timesteps and the
 85 |         corresponding losses for each of those timesteps.
 86 |         This method will perform synchronization to make sure all of the ranks
 87 |         maintain the exact same reweighting.
 88 | 
 89 |         :param local_ts: an integer Tensor of timesteps.
 90 |         :param local_losses: a 1D Tensor of losses.
 91 |         """
 92 |         batch_sizes = [
 93 |             th.tensor([0], dtype=th.int32, device=local_ts.device)
 94 |             for _ in range(dist.get_world_size())
 95 |         ]
 96 |         dist.all_gather(
 97 |             batch_sizes,
 98 |             th.tensor([len(local_ts)], dtype=th.int32, device=local_ts.device),
 99 |         )
100 | 
101 |         # Pad all_gather batches to be the maximum batch size.
102 |         batch_sizes = [x.item() for x in batch_sizes]
103 |         max_bs = max(batch_sizes)
104 | 
105 |         timestep_batches = [th.zeros(max_bs).to(local_ts) for bs in batch_sizes]
106 |         loss_batches = [th.zeros(max_bs).to(local_losses) for bs in batch_sizes]
107 |         dist.all_gather(timestep_batches, local_ts)
108 |         dist.all_gather(loss_batches, local_losses)
109 |         timesteps = [
110 |             x.item() for y, bs in zip(timestep_batches, batch_sizes) for x in y[:bs]
111 |         ]
112 |         losses = [x.item() for y, bs in zip(loss_batches, batch_sizes) for x in y[:bs]]
113 |         self.update_with_all_losses(timesteps, losses)
114 | 
115 |     @abstractmethod
116 |     def update_with_all_losses(self, ts, losses):
117 |         """
118 |         Update the reweighting using losses from a model.
119 | 
120 |         Sub-classes should override this method to update the reweighting
121 |         using losses from the model.
122 | 
123 |         This method directly updates the reweighting without synchronizing
124 |         between workers. It is called by update_with_local_losses from all
125 |         ranks with identical arguments. Thus, it should have deterministic
126 |         behavior to maintain state across workers.
127 | 
128 |         :param ts: a list of int timesteps.
129 |         :param losses: a list of float losses, one per timestep.
130 |         """
131 | 
132 | 
133 | class LossSecondMomentResampler(LossAwareSampler):
134 |     def __init__(self, T, history_per_term=10, uniform_prob=0.001):
135 |         self.T = T
136 |         self.history_per_term = history_per_term
137 |         self.uniform_prob = uniform_prob
138 |         self._loss_history = np.zeros(
139 |             [T, history_per_term], dtype=np.float64
140 |         )
141 |         self._loss_counts = np.zeros([T], dtype=np.int32)
142 | 
143 |     def weights(self):
144 |         if not self._warmed_up():
145 |             return np.ones([self.T], dtype=np.float64)
146 |         weights = np.sqrt(np.mean(self._loss_history ** 2, axis=-1))
147 |         weights /= np.sum(weights)
148 |         weights *= 1 - self.uniform_prob
149 |         weights += self.uniform_prob / len(weights)
150 |         return weights
151 | 
152 |     def update_with_all_losses(self, ts, losses):
153 |         for t, loss in zip(ts, losses):
154 |             if self._loss_counts[t] == self.history_per_term:
155 |                 # Shift out the oldest loss term.
156 |                 self._loss_history[t, :-1] = self._loss_history[t, 1:]
157 |                 self._loss_history[t, -1] = loss
158 |             else:
159 |                 self._loss_history[t, self._loss_counts[t]] = loss
160 |                 self._loss_counts[t] += 1
161 | 
162 |     def _warmed_up(self):
163 |         return (self._loss_counts == self.history_per_term).all()
164 | 


--------------------------------------------------------------------------------
/utils/ransac.py:
--------------------------------------------------------------------------------
  1 | from copy import copy
  2 | import numpy as np
  3 | from numpy.random import default_rng
  4 | rng = default_rng()
  5 | import torch
  6 | import time
  7 | from utils.utils import compute_scale_and_shift
  8 | 
  9 | def square_error_loss(y_true, y_pred):
 10 |     return (y_true - y_pred) ** 2
 11 | 
 12 | def mean_square_error(y_true, y_pred):
 13 |     return torch.sum(square_error_loss(y_true, y_pred)) / y_true.shape[0]
 14 | 
 15 | def mean_absolute_error(y_true, y_pred):
 16 |     # return np.abs(y_true - y_pred).mean()
 17 |     return torch.abs(y_true - y_pred).mean(1)
 18 | 
 19 | def mean_accuracy_inverse(y_true, y_pred):
 20 |     thresh = torch.maximum(y_true / y_pred, y_pred / y_true)
 21 |     return  1 / torch.mean((thresh < 1.25).float())
 22 | 
 23 | 
 24 | class ScaleShiftEstimator:
 25 |     def __init__(self):
 26 |         self.params = (1, 0) # s,t
 27 | 
 28 |     def fit(self, X: np.ndarray, Y: np.ndarray):
 29 |         """ X & Y: Nx1 """
 30 |         start = time.time()
 31 |         self.params = compute_scale_and_shift(X, Y)
 32 |         end = time.time()
 33 |         print(f"ssi: {end - start:.5f}")
 34 |         return self
 35 | 
 36 |     def predict(self, X: np.ndarray):
 37 |         return X * self.params[0] + self.params[1]
 38 |     
 39 | class RANSAC:
 40 |     def __init__(self, n=0.1, k=100, t=0.05, d=0.5, model=ScaleShiftEstimator(), loss=square_error_loss, metric=mean_accuracy_inverse):
 41 |         self.n = n              # `n`: (percent) Minimum number of data points to estimate parameters
 42 |         self.k = k              # `k`: Maximum iterations allowed
 43 |         self.t = t              # `t`: Threshold value to determine if points are fit well
 44 |         self.d = d              # `d`: (percent)Number of close data points required to assert model fits well
 45 |         self.model = model      # `model`: class implementing `fit` and `predict`
 46 |         self.loss = loss        # `loss`: function of `y_true` and `y_pred` that returns a vector
 47 |         self.metric = metric    # `metric`: function of `y_true` and `y_pred` and returns a float
 48 |         self.best_fit = None
 49 |         self.best_error = None
 50 | 
 51 |     def fit(self, X, Y, mask):
 52 |         """ X: source
 53 |             Y: target
 54 |         """
 55 |         assert X.shape == Y.shape == mask.shape
 56 |         B, HW = X.shape
 57 | 
 58 |         X = X.clone()
 59 |         Y = Y.clone()
 60 |         mask = mask.clone()
 61 |         N = int(self.n * HW)
 62 |         T = self.t
 63 |         # T = self.t * torch.abs(Y[mask.bool()]).mean()
 64 |         D = int(self.d * HW)
 65 | 
 66 |         assert D < HW and N < HW, "N, D must be less than HW"
 67 | 
 68 |         self.best_num_inlier = torch.zeros((B, 1), device=X.device).to(torch.int32)
 69 |         self.best_mask_inlier = torch.zeros((B, HW), device=X.device).to(torch.bool)
 70 |         self.best_error = torch.full((B, 1), torch.inf, device=X.device)
 71 |         self.best_fit = torch.empty((B, 2), device=X.device) 
 72 |         self.best_fit[:,0] = 1.0 # init s=1, t=0
 73 |         self.best_fit[:,1] = 0.0 
 74 | 
 75 |         for _ in range(self.k):
 76 |             ids = torch.randperm(HW, device=X.device).repeat(B, 1) # torch.arange(HW, device=X.device).repeat(B, 1) #
 77 |             maybe_inliers = ids[:, :N]
 78 |             maybe_model = compute_scale_and_shift(
 79 |                                             torch.gather(X, 1, maybe_inliers), 
 80 |                                             torch.gather(Y, 1, maybe_inliers),
 81 |                                             torch.gather(mask, 1, maybe_inliers))
 82 | 
 83 |             X_ = X * maybe_model[:, 0:1] + maybe_model[:,1:]
 84 |             threshold = torch.where(self.loss(Y, X_,) < T, 1, 0).to(torch.bool) & mask.bool()
 85 | 
 86 |             better_model = compute_scale_and_shift(X, Y, threshold)
 87 |             X__ = X * better_model[:, 0:1] + better_model[:, 1:]
 88 |             this_error = self.metric(Y, X__)[...,None]
 89 |             this_num_inlier = torch.sum(threshold, 1)[...,None]
 90 |             select = (this_num_inlier > D) & (this_error < self.best_error)
 91 | 
 92 |             self.best_num_inlier = torch.where(select, this_num_inlier, self.best_num_inlier)
 93 |             self.best_mask_inlier = torch.where(select, threshold, self.best_mask_inlier)
 94 |             self.best_fit = torch.where(select, better_model, self.best_fit)
 95 |             self.best_error = torch.where(select, this_error, self.best_error)
 96 |         return self
 97 | 
 98 |     def predict(self, X):
 99 |         return self.best_fit.predict(X)
100 | 
101 | class LinearRegressor:
102 |     def __init__(self):
103 |         self.params = None
104 | 
105 |     def fit(self, X: np.ndarray, y: np.ndarray):
106 |         r, _ = X.shape
107 |         X = np.hstack([np.ones((r, 1)), X])
108 |         self.params = np.linalg.inv(X.T @ X) @ X.T @ y
109 |         return self
110 | 
111 |     def predict(self, X: np.ndarray):
112 |         r, _ = X.shape
113 |         X = np.hstack([np.ones((r, 1)), X])
114 |         return X @ self.params
115 | 
116 | 
117 | if __name__ == "__main__":
118 | 
119 |     regressor = RANSAC(model=LinearRegressor(), loss=square_error_loss, metric=mean_square_error)
120 | 
121 |     X = np.array([-0.848,-0.800,-0.704,-0.632,-0.488,-0.472,-0.368,-0.336,-0.280,-0.200,-0.00800,-0.0840,0.0240,0.100,0.124,0.148,0.232,0.236,0.324,0.356,0.368,0.440,0.512,0.548,0.660,0.640,0.712,0.752,0.776,0.880,0.920,0.944,-0.108,-0.168,-0.720,-0.784,-0.224,-0.604,-0.740,-0.0440,0.388,-0.0200,0.752,0.416,-0.0800,-0.348,0.988,0.776,0.680,0.880,-0.816,-0.424,-0.932,0.272,-0.556,-0.568,-0.600,-0.716,-0.796,-0.880,-0.972,-0.916,0.816,0.892,0.956,0.980,0.988,0.992,0.00400]).reshape(-1,1)
122 |     y = np.array([-0.917,-0.833,-0.801,-0.665,-0.605,-0.545,-0.509,-0.433,-0.397,-0.281,-0.205,-0.169,-0.0531,-0.0651,0.0349,0.0829,0.0589,0.175,0.179,0.191,0.259,0.287,0.359,0.395,0.483,0.539,0.543,0.603,0.667,0.679,0.751,0.803,-0.265,-0.341,0.111,-0.113,0.547,0.791,0.551,0.347,0.975,0.943,-0.249,-0.769,-0.625,-0.861,-0.749,-0.945,-0.493,0.163,-0.469,0.0669,0.891,0.623,-0.609,-0.677,-0.721,-0.745,-0.885,-0.897,-0.969,-0.949,0.707,0.783,0.859,0.979,0.811,0.891,-0.137]).reshape(-1,1)
123 | 
124 |     regressor.fit(X, y)
125 | 
126 |     import matplotlib.pyplot as plt
127 |     plt.style.use("seaborn-darkgrid")
128 |     fig, ax = plt.subplots(1, 1)
129 |     ax.set_box_aspect(1)
130 | 
131 |     plt.scatter(X, y)
132 | 
133 |     line = np.linspace(-1, 1, num=100).reshape(-1, 1)
134 |     plt.plot(line, regressor.predict(line), c="peru")
135 |     # plt.show()
136 |     plt.savefig("ransac.png")
137 |     plt.close()


--------------------------------------------------------------------------------
/datasets/README.md:
--------------------------------------------------------------------------------
  1 | link all the datasets here, example folder structures:
  2 | 
  3 | ```
  4 | datasets
  5 | ├── clearpose -> /raid/songlin/Data/clearpose
  6 | │   ├── clearpose_downsample_100
  7 | │   │   ├── downsample.py
  8 | │   │   ├── model
  9 | │   │   ├── set1
 10 | │   │   ├── set2
 11 | │   │   ├── set3
 12 | │   │   ├── set4
 13 | │   │   ├── set5
 14 | │   │   ├── set6
 15 | │   │   ├── set7
 16 | │   │   ├── set8
 17 | │   │   └── set9
 18 | │   ├── metadata
 19 | │   │   ├── set1
 20 | │   │   ├── set2
 21 | │   │   ├── set3
 22 | │   │   ├── set4
 23 | │   │   ├── set5
 24 | │   │   ├── set6
 25 | │   │   ├── set7
 26 | │   │   ├── set8
 27 | │   │   └── set9
 28 | │   ├── model
 29 | │   │   ├── 003_cracker_box
 30 | │   │   ├── 005_tomato_soup_can
 31 | │   │   ├── 006_mustard_bottle
 32 | │   │   ├── 007_tuna_fish_can
 33 | │   │   ├── 009_gelatin_box
 34 | │   │   ├── BBQSauce
 35 | │   │   ├── beaker_1
 36 | │   │   ├── bottle_1
 37 | │   │   ├── bottle_2
 38 | │   │   ├── bottle_3
 39 | │   │   ├── bottle_4
 40 | │   │   ├── bottle_5
 41 | │   │   ├── bowl_1
 42 | │   │   ├── bowl_2
 43 | │   │   ├── bowl_3
 44 | │   │   ├── bowl_4
 45 | │   │   ├── bowl_5
 46 | │   │   ├── bowl_6
 47 | │   │   ├── container_1
 48 | │   │   ├── container_2
 49 | │   │   ├── container_3
 50 | │   │   ├── container_4
 51 | │   │   ├── container_5
 52 | │   │   ├── create_keypoints.py
 53 | │   │   ├── dropper_1
 54 | │   │   ├── dropper_2
 55 | │   │   ├── flask_1
 56 | │   │   ├── fork_1
 57 | │   │   ├── funnel_1
 58 | │   │   ├── graduated_cylinder_1
 59 | │   │   ├── graduated_cylinder_2
 60 | │   │   ├── knife_1
 61 | │   │   ├── knife_2
 62 | │   │   ├── Mayo
 63 | │   │   ├── mug_1
 64 | │   │   ├── mug_2
 65 | │   │   ├── OrangeJuice
 66 | │   │   ├── pan_1
 67 | │   │   ├── pan_2
 68 | │   │   ├── pan_3
 69 | │   │   ├── pitcher_1
 70 | │   │   ├── plate_1
 71 | │   │   ├── plate_2
 72 | │   │   ├── reagent_bottle_1
 73 | │   │   ├── reagent_bottle_2
 74 | │   │   ├── round_table
 75 | │   │   ├── spoon_1
 76 | │   │   ├── spoon_2
 77 | │   │   ├── stick_1
 78 | │   │   ├── syringe_1
 79 | │   │   ├── trans_models.blend
 80 | │   │   ├── trans_models_keypoint.blend
 81 | │   │   ├── trans_models_keypoint.blend1
 82 | │   │   ├── trans_models_keypoint (copy).blend
 83 | │   │   ├── trans_models_kp.blend
 84 | │   │   ├── water_cup_1
 85 | │   │   ├── water_cup_10
 86 | │   │   ├── water_cup_11
 87 | │   │   ├── water_cup_12
 88 | │   │   ├── water_cup_13
 89 | │   │   ├── water_cup_14
 90 | │   │   ├── water_cup_2
 91 | │   │   ├── water_cup_3
 92 | │   │   ├── water_cup_4
 93 | │   │   ├── water_cup_5
 94 | │   │   ├── water_cup_6
 95 | │   │   ├── water_cup_7
 96 | │   │   ├── water_cup_8
 97 | │   │   ├── water_cup_9
 98 | │   │   ├── wine_cup_1
 99 | │   │   ├── wine_cup_2
100 | │   │   ├── wine_cup_3
101 | │   │   ├── wine_cup_4
102 | │   │   ├── wine_cup_5
103 | │   │   ├── wine_cup_6
104 | │   │   ├── wine_cup_7
105 | │   │   ├── wine_cup_8
106 | │   │   └── wine_cup_9
107 | │   ├── set1
108 | │   │   ├── scene1
109 | │   │   ├── scene2
110 | │   │   ├── scene3
111 | │   │   ├── scene4
112 | │   │   └── scene5
113 | │   ├── set2
114 | │   │   ├── scene1
115 | │   │   ├── scene3
116 | │   │   ├── scene4
117 | │   │   ├── scene5
118 | │   │   └── scene6
119 | │   ├── set3
120 | │   │   ├── scene1
121 | │   │   ├── scene11
122 | │   │   ├── scene3
123 | │   │   ├── scene4
124 | │   │   └── scene8
125 | │   ├── set4
126 | │   │   ├── scene1
127 | │   │   ├── scene2
128 | │   │   ├── scene3
129 | │   │   ├── scene4
130 | │   │   ├── scene5
131 | │   │   └── scene6
132 | │   ├── set5
133 | │   │   ├── scene1
134 | │   │   ├── scene2
135 | │   │   ├── scene3
136 | │   │   ├── scene4
137 | │   │   ├── scene5
138 | │   │   └── scene6
139 | │   ├── set6
140 | │   │   ├── scene1
141 | │   │   ├── scene2
142 | │   │   ├── scene3
143 | │   │   ├── scene4
144 | │   │   ├── scene5
145 | │   │   └── scene6
146 | │   ├── set7
147 | │   │   ├── scene1
148 | │   │   ├── scene2
149 | │   │   ├── scene3
150 | │   │   ├── scene4
151 | │   │   ├── scene5
152 | │   │   └── scene6
153 | │   ├── set8
154 | │   │   ├── scene1
155 | │   │   ├── scene2
156 | │   │   ├── scene3
157 | │   │   ├── scene4
158 | │   │   ├── scene5
159 | │   │   └── scene6
160 | │   └── set9
161 | │       ├── scene10
162 | │       ├── scene11
163 | │       ├── scene12
164 | │       ├── scene7
165 | │       ├── scene8
166 | │       └── scene9
167 | ├── DREDS
168 | │   ├── test -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/test
169 | │   │   └── shapenet_generate_1216_val_novel
170 | │   ├── test_std_catknown -> /raid/songlin/Data/DREDS_ECCV2022/STD-CatKnown
171 | │   │   ├── test_0
172 | │   │   ├── test_14-1
173 | │   │   ├── test_18-1
174 | │   │   ├── test_19
175 | │   │   ├── test_20-3
176 | │   │   ├── test_3-2
177 | │   │   ├── test_4-2
178 | │   │   ├── test_5-2
179 | │   │   ├── test_6-1
180 | │   │   ├── test_7-1
181 | │   │   ├── test_8
182 | │   │   ├── test_9-2
183 | │   │   ├── train_0-5
184 | │   │   ├── train_10-1
185 | │   │   ├── train_12
186 | │   │   ├── train_1-4
187 | │   │   ├── train_14-1
188 | │   │   ├── train_16-2
189 | │   │   ├── train_17-1
190 | │   │   ├── train_19-1
191 | │   │   ├── train_3
192 | │   │   ├── train_4-1
193 | │   │   ├── train_7-1
194 | │   │   ├── train_8
195 | │   │   └── train_9-3
196 | │   ├── test_std_catnovel -> /raid/songlin/Data/DREDS_ECCV2022/STD-CatNovel
197 | │   │   └── real_data_novel
198 | │   ├── train -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/train
199 | │   │   ├── part0
200 | │   │   ├── part1
201 | │   │   ├── part2
202 | │   │   ├── part3
203 | │   │   └── part4
204 | │   └── val -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/val
205 | │       └── shapenet_generate_1216
206 | ├── HISS
207 | │   ├── train -> /raid/songlin/Data/hssd-isaac-sim-100k
208 | │   │   ├── 102344049
209 | │   │   ├── 102344280
210 | │   │   ├── 103997586_171030666
211 | │   │   ├── 107734119_175999932
212 | │   │   └── bad_his.txt
213 | │   └── val -> /raid/songlin/Data/hssd-isaac-sim-300hq
214 | │       ├── 102344049
215 | │       ├── 102344280
216 | │       ├── 103997586_171030666
217 | │       ├── 107734119_175999932
218 | │       ├── bad_his.txt
219 | │       └── simulation2
220 | ├── README.md
221 | ├── Real
222 | │   └── xiaomeng
223 | │       ├── 0000_depth.png
224 | │       ├── 0000_ir_l.png
225 | │       ├── 0000_ir_r.png
226 | │       ├── 0000_raw_disparity.png
227 | │       ├── 0000_rgb.png
228 | │       └── intrinsics.txt
229 | └── sceneflow -> /raid/songlin/Data/sceneflow
230 |     ├── bad_sceneflow_test.txt
231 |     ├── bad_sceneflow_train.txt
232 |     ├── Driving
233 |     │   ├── disparity
234 |     │   ├── frames_cleanpass
235 |     │   ├── frames_finalpass
236 |     │   ├── raw_cleanpass
237 |     │   └── raw_finalpass
238 |     ├── FlyingThings3D
239 |     │   ├── disparity
240 |     │   ├── frames_cleanpass
241 |     │   ├── frames_finalpass
242 |     │   ├── raw_cleanpass
243 |     │   └── raw_finalpass
244 |     └── Monkaa
245 |         ├── disparity
246 |         ├── frames_cleanpass
247 |         ├── frames_finalpass
248 |         ├── raw_cleanpass
249 |         └── raw_finalpass
250 | 
251 | 227 directories, 18 files
252 | 
253 | ```
254 | 


--------------------------------------------------------------------------------
/distributed_evaluate.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import math
  3 | import argparse
  4 | import torch
  5 | import logging
  6 | from tqdm import tqdm
  7 | 
  8 | from core.custom_pipelines import GuidedLatentDiffusionPipeline
  9 | from accelerate import Accelerator, PartialState
 10 | from core.guidance import FlowGuidance
 11 | import numpy as np
 12 | from utils.utils import seed_everything
 13 | from config import TrainingConfig, create_sampler
 14 | from diffusers import UNet2DModel, DDIMScheduler
 15 | from utils.utils import InputPadder, metrics_to_dict, pretty_json
 16 | from accelerate.logging import get_logger
 17 | from utils.camera import  plot_error_map
 18 | from evaluate import eval_batch
 19 | from data.stereo_datasets import *
 20 | from data.mono_datasets import *
 21 | 
 22 | import hydra
 23 | from config import Config, TrainingConfig, create_sampler, setup_hydra_configurations
 24 | 
 25 | logger = get_logger(__name__, log_level="INFO")
 26 | 
 27 | @hydra.main(version_base=None, config_path="conf", config_name="config.yaml")
 28 | def run_distributed_eval(base_cfg: Config):
 29 |     if base_cfg.seed != -1:
 30 |         seed_everything(base_cfg.seed) # for reproducing
 31 | 
 32 |     accelerator = Accelerator() # hack: enable logging
 33 | 
 34 |     config = base_cfg.task
 35 |     assert len(config.eval_dataset) == 1, "only support single dataset for evaluation"
 36 | 
 37 |     inputPadder = InputPadder(config.image_size, divis_by=8)
 38 |     # config.camera # hack init default camera
 39 | 
 40 |     patrained_path = f"{config.resume_pretrained}"
 41 |     if os.path.exists(patrained_path):
 42 |         logger.info(f"load weights from {patrained_path}")
 43 |         """ pipeline = GuidedLatentDiffusionPipeline.from_pretrained(patrained_path).to("cuda")
 44 |         # model = UNet2DConditionModel.from_pretrained(patrained_path)
 45 | 
 46 |         from diffusers import DDIMScheduler
 47 |         ddim = DDIMScheduler.from_config(dict(
 48 |             beta_schedule = config.beta_schedule, # "scaled_linear",
 49 |             beta_start = config.beta_start, # 0.00085,
 50 |             beta_end = config.beta_end, # 0.012,
 51 |             clip_sample = config.clip_sample, # False,
 52 |             num_train_timesteps = config.num_train_timesteps, # 1000,
 53 |             prediction_type = config.prediction_type, # #"v_prediction",
 54 |             set_alpha_to_one = False,
 55 |             skip_prk_steps = True,
 56 |             steps_offset = 1,
 57 |             trained_betas = None
 58 |         ))
 59 |         pipeline.scheduler = ddim """
 60 | 
 61 |         from core.custom_pipelines import GuidedDiffusionPipeline, GuidedLatentDiffusionPipeline
 62 |         clazz_pipeline = GuidedLatentDiffusionPipeline if config.ldm else GuidedDiffusionPipeline
 63 |         pipeline = clazz_pipeline.from_pretrained(patrained_path).to("cuda")
 64 |         pipeline.guidance.flow_guidance_mode=config.flow_guidance_mode
 65 | 
 66 |         pipeline.scheduler = create_sampler(config, train=False)
 67 |     else:
 68 |         raise ValueError(f"patrained path not exists: {patrained_path}")
 69 | 
 70 |     if config.eval_output:
 71 |         eval_output_dir = f"{config.resume_pretrained}/{config.eval_output}"
 72 |     else:
 73 |         eval_output_dir = f"{config.resume_pretrained}/dist.{config.eval_dataset[0]}.g.{config.guide_source}.b{config.eval_num_batch}.{config.eval_split}"
 74 | 
 75 |     if not os.path.exists(eval_output_dir):
 76 |         os.makedirs(eval_output_dir, exist_ok=True)
 77 |     
 78 |     logger.logger.addHandler(logging.FileHandler(f"{eval_output_dir}/eval.log"))
 79 |     logger.logger.addHandler(logging.StreamHandler(sys.stdout))
 80 |     logger.info(f"eval output dir: {eval_output_dir}")
 81 | 
 82 |     from data.data_loader import create_dataset
 83 |     val_dataset = create_dataset(config, config.eval_dataset[0], split = config.eval_split)
 84 |     # print(f"eval_batch_size={config.eval_batch_size}"); exit(0)
 85 |     val_dataloader = torch.utils.data.DataLoader(val_dataset,
 86 |                                                 batch_size=config.eval_batch_size,
 87 |                                                 shuffle=True,
 88 |                                                 pin_memory=False, 
 89 |                                                 drop_last=False)
 90 |     
 91 |     """ if type(model.sample_size) == list:
 92 |         model.sample_size[0] = inputPadder.padded_size[0]
 93 |         model.sample_size[1] = inputPadder.padded_size[1] """
 94 | 
 95 |     # distributed evaluation
 96 |     val_dataloader = accelerator.prepare(val_dataloader)
 97 | 
 98 |     pbar = tqdm(total=len(val_dataloader), desc="Eval", disable=not accelerator.is_local_main_process, position=0)
 99 |     disable_bar = not accelerator.is_local_main_process
100 |     distributed_state = PartialState()
101 | 
102 |     w = config.flow_guidance_weights[0]
103 |     if accelerator.is_local_main_process:
104 |         logger.info(f"guided by {config.guide_source}")
105 | 
106 |     disp_metrics = []
107 |     depth_metrics = []
108 |     total = 0
109 |     for i, batch in enumerate(val_dataloader):
110 |         if config.eval_num_batch > 0 and i >= config.eval_num_batch:
111 |             break
112 |         
113 |         normalized_rgbs = batch["normalized_rgb"]
114 |         gt_images = batch["normalized_disp"]
115 |         raw_disps = batch["raw_disp"]
116 |         left_images = batch["left_image"] if "left_image" in batch else None
117 |         right_images = batch["right_image"] if "right_image" in batch else None
118 |         depth_images = batch["depth"] if "depth" in batch else None
119 |         gt_masks = batch["mask"]
120 |         fxb = batch["fxb"]
121 |         sim_disps = batch["sim_disp"] if "sim_disp" in batch else None
122 |         
123 |         B = normalized_rgbs.shape[0]
124 |         # assert not torch.any(gt_images[gt_masks.to(torch.bool)] == 0.0), "dataset bug"
125 |         if config.guide_source is None:
126 |             pass
127 | 
128 |         elif config.guide_source == "raft-stereo":
129 |             pass
130 | 
131 |         elif config.guide_source == "stereo-match":
132 |             pass
133 | 
134 |         elif config.guide_source == "raw-depth":
135 |             guidance_image = batch["raw_depth"] # raw
136 |             valid = guidance_image > 0
137 | 
138 |         elif config.guide_source == "gt":
139 |             guidance_image = batch["depth"] # gt
140 |             valid = guidance_image > 0
141 |         else:
142 |             raise ValueError(f"Unknown guidance mode: {config.guide_source}")
143 | 
144 |         if config.guide_source is not None:
145 |             pipeline.guidance.prepare(guidance_image, valid, "depth") # disp
146 |             pipeline.guidance.flow_guidance_weight = w
147 | 
148 |         pred_disps, metrics_, uncertainties, error, intermediates = eval_batch(config, pipeline, disable_bar,  fxb, normalized_rgbs, 
149 |                                                                                raw_disps, gt_masks, left_images, right_images, sim_disps)
150 |         metrics = metrics_to_dict(*metrics_)
151 |         logger.info(f"metrics(w={w}):{pretty_json(metrics)}")
152 | 
153 |         disp_err = torch.from_numpy(metrics_[0]).to(distributed_state.device) # to be gathered
154 |         depth_err = torch.from_numpy(metrics_[1]).to(distributed_state.device)
155 | 
156 |         if config.plot_error_map:
157 |             fname = lambda name: f"{eval_output_dir}/idx{i}_w{w}_pid{distributed_state.process_index}_{name}"
158 |             error_map = plot_error_map(error)
159 |             error_map.save(fname("error.png"))
160 |         
161 |         # gather all batch results
162 |         gathered_disp_err = accelerator.gather_for_metrics(disp_err)
163 |         gathered_depth_err = accelerator.gather_for_metrics(depth_err)
164 | 
165 |         disp_metrics.extend(gathered_disp_err) 
166 |         depth_metrics.extend(gathered_depth_err)
167 |         total += gathered_disp_err.shape[0]
168 | 
169 |         pbar.update(1)
170 | 
171 |     # whole val set results
172 |     gathered_metrics = metrics_to_dict(torch.vstack(disp_metrics).cpu().numpy(), torch.vstack(depth_metrics).cpu().numpy())
173 |     logger.info(f"final metrics:{pretty_json(gathered_metrics)}")
174 |     logger.info(f"total evaluated {total} samples, please check if correct")
175 | 
176 | if __name__ == "__main__":
177 |     setup_hydra_configurations()
178 |     run_distributed_eval()


--------------------------------------------------------------------------------
/isaacsim/utils_func.py:
--------------------------------------------------------------------------------
  1 | import os, re, math
  2 | import numpy as np
  3 | from typing import Union, Type, List, Tuple
  4 | from pxr import Gf, Sdf, Usd, UsdGeom
  5 | from omni.isaac.core.utils.prims import get_prim_at_path
  6 | import transforms3d
  7 | import omni
  8 | 
  9 | def find_next_sequence_id(output_dir):
 10 |     import glob
 11 |     import os
 12 |     files = sorted(glob.glob(os.path.join(output_dir, "*.png")), reverse=True)
 13 |     if len(files) == 0: 
 14 |         return 0
 15 |     return int(files[0].split("/")[-1].split("_")[0]) + 1
 16 | 
 17 | def get_visibility_attribute(
 18 |     stage: Usd.Stage, prim_path: str
 19 | ) -> Union[Usd.Attribute, None]:
 20 |     #Return the visibility attribute of a prim
 21 |     path = Sdf.Path(prim_path)
 22 |     prim = stage.GetPrimAtPath(path)
 23 |     if not prim.IsValid():
 24 |         return None
 25 |     visibility_attribute = prim.GetAttribute("visibility")
 26 |     return visibility_attribute
 27 | 
 28 | def get_all_child_mesh(parent_prim: Usd.Prim) -> Usd.Prim:
 29 |     # Iterates only active, loaded, defined, non-abstract children
 30 |     mesh_prims = []
 31 |     for model_prim in parent_prim.GetChildren():
 32 |         if "model" in model_prim.GetPath().pathString:
 33 |             for child_prim in model_prim.GetChildren():
 34 |                 if child_prim.IsA(UsdGeom.Mesh):
 35 |                     mesh_prims.append(child_prim)
 36 |     return mesh_prims
 37 | 
 38 | def create_materials(self, stage, num, opacity):
 39 |     MDL = "OmniPBR.mdl"
 40 |     # MDL = "OmniGlass.mdl"
 41 |     mtl_name, _ = os.path.splitext(MDL)
 42 |     MAT_PATH = "/World/Looks"
 43 |     materials = []
 44 |     for _ in range(num):
 45 |         prim_path = omni.usd.get_stage_next_free_path(stage, f"{MAT_PATH}/{mtl_name}", False)
 46 |         mat = self.create_omnipbr_material(mtl_url=MDL, mtl_name=mtl_name, mtl_path=prim_path, cutout_opacity=opacity)
 47 |         materials.append(mat)
 48 |     return materials
 49 | 
 50 | def parse_quadrant(q):
 51 |     """ x+-y+-z+-, in isaac sim hssd coordinate system """
 52 |     x_, y_, z_ = q.split(',')
 53 |     if y_[1:] == '+':
 54 |         theta = [0, np.pi/2]
 55 |     elif y_[1:] == '-':
 56 |         theta = [np.pi/2, np.pi]
 57 |     else:
 58 |         theta = [0, np.pi]
 59 | 
 60 |     if z_[1:] == '+':
 61 |         phi = [0, np.pi/2]
 62 |     elif z_[1:] == '-':
 63 |         phi = [np.pi/2, np.pi]
 64 |     else:
 65 |         phi = [0, np.pi]
 66 | 
 67 |     return theta, phi
 68 | 
 69 | def grasp_pose_in_robot(target_grasp, graspnet_offset = np.array([0,0,0])):
 70 |     T_table_grasp = np.eye(4)
 71 |     T_table_grasp[:3, :3] = transforms3d.quaternions.quat2mat(target_grasp['orientation'])
 72 |     T_table_grasp[:3, 3] = target_grasp['position']
 73 | 
 74 |     T_world_table = np.eye(4)
 75 |     # TODO random table rotation around z
 76 |     T_world_table[:3, 3] = graspnet_offset
 77 | 
 78 |     T_grasp_ee = np.array([
 79 |         [0,  0,  1, 0],
 80 |         [0, -1,  0, 0],
 81 |         [1,  0,  0, 0],
 82 |         [0,  0,  0, 1]
 83 |     ])
 84 | 
 85 |     T_robot_world = np.eye(4) # should be always be identity due to curobo limitation
 86 |     T_ee_hand = np.eye(4)
 87 |     T_ee_hand[:3, 3] = np.array([0, 0, -0.10])
 88 | 
 89 |     """ T_robot_hand: base_link -> panda_hand """
 90 |     T_robot_hand = T_robot_world @ T_world_table @ T_table_grasp @ T_grasp_ee @ T_ee_hand
 91 |     target_pose = {
 92 |         'position' : T_robot_hand[:3, 3],
 93 |         'orientation' : transforms3d.quaternions.mat2quat(T_robot_hand[:3, :3])
 94 |     }
 95 |     return target_pose
 96 | 
 97 | def compute_obb(bbox_cache: UsdGeom.BBoxCache, prim_path: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
 98 |     """Computes the Oriented Bounding Box (OBB) of a prim
 99 | 
100 |     .. note::
101 | 
102 |         * The OBB does not guarantee the smallest possible bounding box, it rotates and scales the default AABB.
103 |         * The rotation matrix incorporates any scale factors applied to the object.
104 |         * The `half_extent` values do not include these scaling effects.
105 | 
106 |     Args:
107 |         bbox_cache (UsdGeom.BBoxCache): USD Bounding Box Cache object to use for computation
108 |         prim_path (str): Prim path to compute OBB for
109 | 
110 |     Returns:
111 |         Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing the following OBB information:
112 |             - The centroid of the OBB as a NumPy array.
113 |             - The axes of the OBB as a 2D NumPy array, where each row represents a different axis.
114 |             - The half extent of the OBB as a NumPy array.
115 | 
116 |     Example:
117 | 
118 |     .. code-block:: python
119 | 
120 |         >>> import omni.isaac.core.utils.bounds as bounds_utils
121 |         >>>
122 |         >>> # 1 stage unit length cube centered at (0.0, 0.0, 0.0)
123 |         >>> cache = bounds_utils.create_bbox_cache()
124 |         >>> centroid, axes, half_extent = bounds_utils.compute_obb(cache, prim_path="/World/Cube")
125 |         >>> centroid
126 |         [0. 0. 0.]
127 |         >>> axes
128 |         [[1. 0. 0.]
129 |          [0. 1. 0.]
130 |          [0. 0. 1.]]
131 |         >>> half_extent
132 |         [0.5 0.5 0.5]
133 |         >>>
134 |         >>> # the same cube rotated 45 degrees around the z-axis
135 |         >>> cache = bounds_utils.create_bbox_cache()
136 |         >>> centroid, axes, half_extent = bounds_utils.compute_obb(cache, prim_path="/World/Cube")
137 |         >>> centroid
138 |         [0. 0. 0.]
139 |         >>> axes
140 |         [[ 0.70710678  0.70710678  0.        ]
141 |          [-0.70710678  0.70710678  0.        ]
142 |          [ 0.          0.          1.        ]]
143 |         >>> half_extent
144 |         [0.5 0.5 0.5]
145 |     """
146 |     # Compute the BBox3d for the prim
147 |     prim = get_prim_at_path(prim_path)
148 |     bound = bbox_cache.ComputeWorldBound(prim)
149 | 
150 |     # Compute the translated centroid of the world bound
151 |     centroid = bound.ComputeCentroid()
152 | 
153 |     # Compute the axis vectors of the OBB
154 |     # NOTE: The rotation matrix incorporates the scale factors applied to the object
155 |     rotation_matrix = bound.GetMatrix().ExtractRotationMatrix()
156 |     x_axis = rotation_matrix.GetRow(0)
157 |     y_axis = rotation_matrix.GetRow(1)
158 |     z_axis = rotation_matrix.GetRow(2)
159 | 
160 |     # Compute the half-lengths of the OBB along each axis
161 |     # NOTE the size/extent values do not include any scaling effects
162 |     half_extent = bound.GetRange().GetSize() * 0.5
163 | 
164 |     return np.array([*centroid]), np.array([[*x_axis], [*y_axis], [*z_axis]]), np.array(half_extent)
165 | 
166 | def get_obb_corners(centroid: np.ndarray, axes: np.ndarray, half_extent: np.ndarray) -> np.ndarray:
167 |     """Computes the corners of the Oriented Bounding Box (OBB) from the given OBB information
168 | 
169 |     Args:
170 |         centroid (np.ndarray): The centroid of the OBB as a NumPy array.
171 |         axes (np.ndarray): The axes of the OBB as a 2D NumPy array, where each row represents a different axis.
172 |         half_extent (np.ndarray): The half extent of the OBB as a NumPy array.
173 | 
174 |     Returns:
175 |         np.ndarray: NumPy array of shape (8, 3) containing each corner location of the OBB
176 | 
177 |         :math:`c_0 = (x_{min}, y_{min}, z_{min})`
178 |         |br| :math:`c_1 = (x_{min}, y_{min}, z_{max})`
179 |         |br| :math:`c_2 = (x_{min}, y_{max}, z_{min})`
180 |         |br| :math:`c_3 = (x_{min}, y_{max}, z_{max})`
181 |         |br| :math:`c_4 = (x_{max}, y_{min}, z_{min})`
182 |         |br| :math:`c_5 = (x_{max}, y_{min}, z_{max})`
183 |         |br| :math:`c_6 = (x_{max}, y_{max}, z_{min})`
184 |         |br| :math:`c_7 = (x_{max}, y_{max}, z_{max})`
185 | 
186 |     Example:
187 | 
188 |     .. code-block:: python
189 | 
190 |         >>> import omni.isaac.core.utils.bounds as bounds_utils
191 |         >>>
192 |         >>> cache = bounds_utils.create_bbox_cache()
193 |         >>> centroid, axes, half_extent = bounds_utils.compute_obb(cache, prim_path="/World/Cube")
194 |         >>> bounds_utils.get_obb_corners(centroid, axes, half_extent)
195 |         [[-0.5 -0.5 -0.5]
196 |          [-0.5 -0.5  0.5]
197 |          [-0.5  0.5 -0.5]
198 |          [-0.5  0.5  0.5]
199 |          [ 0.5 -0.5 -0.5]
200 |          [ 0.5 -0.5  0.5]
201 |          [ 0.5  0.5 -0.5]
202 |          [ 0.5  0.5  0.5]]
203 |     """
204 |     corners = [
205 |         centroid - axes[0] * half_extent[0] - axes[1] * half_extent[1] - axes[2] * half_extent[2],
206 |         centroid - axes[0] * half_extent[0] - axes[1] * half_extent[1] + axes[2] * half_extent[2],
207 |         centroid - axes[0] * half_extent[0] + axes[1] * half_extent[1] - axes[2] * half_extent[2],
208 |         centroid - axes[0] * half_extent[0] + axes[1] * half_extent[1] + axes[2] * half_extent[2],
209 |         centroid + axes[0] * half_extent[0] - axes[1] * half_extent[1] - axes[2] * half_extent[2],
210 |         centroid + axes[0] * half_extent[0] - axes[1] * half_extent[1] + axes[2] * half_extent[2],
211 |         centroid + axes[0] * half_extent[0] + axes[1] * half_extent[1] - axes[2] * half_extent[2],
212 |         centroid + axes[0] * half_extent[0] + axes[1] * half_extent[1] + axes[2] * half_extent[2],
213 |     ]
214 |     return np.array(corners)
215 | 


--------------------------------------------------------------------------------
/scripts/check_stereo.py:
--------------------------------------------------------------------------------
  1 | import hydra
  2 | from omegaconf import DictConfig, OmegaConf
  3 | from hydra.core.config_store import ConfigStore
  4 | from config import Config, TrainingConfig, setup_hydra_configurations
  5 | from data.data_loader import fetch_dataloader
  6 | from utils.utils import seed_everything
  7 | from accelerate import Accelerator
  8 | from accelerate.logging import get_logger
  9 | from tqdm import tqdm
 10 | from utils.utils import Normalizer
 11 | import torch.nn.functional as F
 12 | 
 13 | import torch
 14 | import numpy as np
 15 | from PIL import Image
 16 | 
 17 | logger = get_logger(__name__, log_level="INFO") # multi-process logging
 18 | 
 19 | Accelerator() # hack: enable logging
 20 | 
 21 | @hydra.main(version_base=None, config_path="conf", config_name="config.yaml")
 22 | def check(config: Config):
 23 |     cfg = config.task
 24 |     logger.info(cfg.train_dataset)
 25 | 
 26 |     from utils.camera import DepthCamera, Realsense
 27 |     from functools import partial
 28 |     from utils import frame_utils
 29 |     sim_camera = DepthCamera.from_device("sim")
 30 |     # sim_camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}")
 31 |     sim_camera.change_resolution(cfg.camera_resolution)
 32 |     disp_reader = partial(frame_utils.readDispReal, sim_camera)
 33 | 
 34 |     # sim_disp, sim_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_simDepthImage.exr")
 35 |     # sim_disp, sim_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_simDispImage.png")
 36 |     # raw_disp, raw_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_depth.exr")
 37 | 
 38 |     # epe = np.abs(sim_disp[sim_valid] - raw_disp[sim_valid]).mean()
 39 |     # assert epe < 1, f"bad quality sim disp, epe={epe}"
 40 |     
 41 |     train_dataloader, val_dataloader_lst = fetch_dataloader(cfg)
 42 |     logger.info(val_dataloader_lst[0].dataset.__class__.__name__)
 43 |     
 44 |     all_dataloaders = [train_dataloader]
 45 |     all_dataloaders.extend(val_dataloader_lst)
 46 |     bad = []
 47 |     
 48 |     stats = {
 49 |         'mean': [],
 50 |         'med': [],
 51 |         'min': [],
 52 |         'max': [],
 53 |         'std': []
 54 |     }
 55 | 
 56 |     stats_norm = {
 57 |         'mean': [],
 58 |         'med': [],
 59 |         'min': [],
 60 |         'max': [],
 61 |         'std': []
 62 |     }
 63 |     count = 0 
 64 | 
 65 |     norm = Normalizer.from_config(cfg)
 66 | 
 67 |     bads = {}
 68 | 
 69 |     for i, dataloader in enumerate(val_dataloader_lst): # all_dataloaders, [train_dataloader]
 70 |         pbar = tqdm(total=len(dataloader))
 71 |         for j, data in enumerate(dataloader):
 72 |             # print(data.keys())
 73 |             B = data['mask'].shape[0]
 74 |             for b in range(B):
 75 |                 mask = data['mask'][b]
 76 |                 # sim_mask = data['sim_mask'][b]
 77 | 
 78 |                 disp = data['raw_disp'][b] 
 79 |                 disp_norm = data["normalized_disp"][b]
 80 |                 # rgb = data['normalized_rgb'][b]
 81 |                 index = data['index'][b]
 82 |                 path = data['path'][b]
 83 | 
 84 |                 # sim_disp = data["sim_disp_unnorm"][b]
 85 |                 # sim_valid = data["sim_mask"][b].bool()
 86 | 
 87 |                 stats['mean'].append(disp.mean().item())
 88 |                 stats['med'].append(disp.median().item())
 89 |                 stats['min'].append(disp.min().item())
 90 |                 stats['max'].append(disp.max().item())
 91 |                 stats['std'].append(disp.std().item())
 92 | 
 93 |                 stats_norm['mean'].append(disp_norm.mean().item())
 94 |                 stats_norm['med'].append(disp_norm.median().item())
 95 |                 stats_norm['min'].append(disp_norm.min().item())
 96 |                 stats_norm['max'].append(disp_norm.max().item())
 97 |                 stats_norm['std'].append(disp_norm.std().item())
 98 | 
 99 |                 # sim_disp, sim_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_simDepthImage.exr")
100 |                 # sim_disp, sim_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_simDispImage.png")
101 |                 # raw_disp, raw_valid, min_disp, max_disp = disp_reader("datasets/HssdIsaacStd/train/102344049/kitchentable/1500_depth.exr")
102 | 
103 |                 # epe = torch.abs(sim_disp[sim_valid] - disp[sim_valid]).mean()
104 |                 if True: #&epe > 2.:
105 |                     # print(f"bad quality sim disp, epe={epe}, {data['path']}")
106 |                     # bads[data['path'][b]] = epe
107 | 
108 |                     if "normalized_rgb" in data:
109 |                         rgb = data['normalized_rgb'][b:b+1]
110 |                         Image.fromarray(((rgb[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_rgb.png")
111 | 
112 |                     if True:
113 |                         left = data['left_image'][b:b+1]
114 |                         Image.fromarray(((left[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_left.png")
115 | 
116 |                         right = data['right_image'][b:b+1]
117 |                         Image.fromarray(((right[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_right.png")
118 | 
119 |                         H, W = disp.shape[-2:]
120 |                         device = left.device
121 | 
122 |                         xx, yy = torch.meshgrid(torch.arange(W), torch.arange(H), indexing='xy')
123 |                         xx = xx.unsqueeze(0).repeat(1, 1, 1).to(device)
124 |                         yy = yy.unsqueeze(0).repeat(1, 1, 1).to(device)
125 | 
126 |                         # raw_disp = data['raw_disp'][b]
127 |                         xx = (xx - disp) / ((W  - 1) / 2.) - 1 
128 |                         yy = yy / ((H - 1) / 2.) - 1
129 |                         grid = torch.stack((xx, yy), dim=-1)
130 |                         warp_left_image = F.grid_sample(right, grid, align_corners=True, mode="bilinear", padding_mode="border")
131 |                         warp_left_image[0][mask.repeat(3,1,1)<1.0] = -1
132 |                         Image.fromarray(((warp_left_image[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_warped_right.png")
133 |                         loss = F.l1_loss(left[..., 0:], warp_left_image, reduction='mean')
134 |                         logger.info(f"raw disp loss: {loss.item()}")
135 |                         
136 |                         sim_disp = norm.denormalize(data["sim_disp"])[b] 
137 |                         xx, yy = torch.meshgrid(torch.arange(W), torch.arange(H), indexing='xy')
138 |                         xx = xx.unsqueeze(0).repeat(B, 1, 1).to(device)
139 |                         yy = yy.unsqueeze(0).repeat(B, 1, 1).to(device)
140 |                         xx = (xx - sim_disp) / ((W  - 1) / 2.) - 1 
141 |                         yy = yy / ((H - 1) / 2.) - 1
142 |                         sim_grid = torch.stack((xx, yy), dim=-1)
143 |                         warp_left_image_sim = F.grid_sample(right, sim_grid, align_corners=True, mode="bilinear", padding_mode="border")
144 |                         # warp_left_image_sim[0][mask.repeat(3,1,1)<1.0] = -1 for sparse dataset
145 |                         warp_left_image_sim[0][mask.repeat(3,1,1)<1.0] = -1
146 |                         Image.fromarray(((warp_left_image_sim[0]+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_{j}_warped_right_sim.png")
147 |                         loss_sim = F.l1_loss(left[..., 0:], warp_left_image_sim, reduction='mean')
148 |                         logger.info(f"sim disp loss: {loss_sim.item()}")
149 |                                  
150 |                 """ if True or mask.sum() / mask.numel() < 0.98:
151 |                     bad.append(path)
152 |                     logger.info(f"bad image {index}: {path}")
153 | 
154 |                     if True:
155 |                         # low, high = torch.quantile(data['depth'][b], torch.tensor((0.02, 0.98))) # gt depth
156 |                         # d = (data['depth'][b] - low) / (high - low)
157 |                         # Image.fromarray(mask[0].cpu().numpy().astype(np.uint8)*255).save(f"{index}_mask.png")
158 |                         # Image.fromarray((d[0].clamp(0,1)*255).cpu().numpy().astype(np.uint8)).save(f"{index}_depth_p.png")
159 |                         Image.fromarray(((rgb+1) * 127.5).cpu().numpy().astype(np.uint8).transpose(1,2,0)).save(f"{index}_rgb.png") """
160 |                 
161 |                 count += 1
162 |                 if count % 1000 == 0:
163 |                     print("stats_raw...")
164 |                     print(f"tatal={len(stats['mean'])}")
165 |                     for k, vals in stats.items():
166 |                         print(f"{k}: {np.mean(vals)}")    
167 |                     print("stats_norm...")
168 |                     for k, vals in stats_norm.items():
169 |                         print(f"{k}: {np.mean(vals)}")
170 |                         
171 |             #     break
172 |             # break
173 |             pbar.update(1)
174 | 
175 |     print(f"tatal={len(stats['mean'])}")
176 |     print("stats_raw...")
177 |     for k, vals in stats.items():
178 |         print(f"{k}: {np.mean(vals)}")
179 |     print("stats_norm...")
180 |     for k, vals in stats_norm.items():
181 |         print(f"{k}: {np.mean(vals)}")
182 | 
183 |     # print("stats:", stats)
184 |     logger.info(f"how many bad images? {len(bads.items())}")
185 |     with open(f'bad_his.txt', 'w') as f:
186 |         for path,epe in bads.items():
187 |             f.write(f"{path} {epe}\n")
188 | 
189 | if __name__ == "__main__":
190 | 
191 |     seed_everything(0)
192 |     setup_hydra_configurations()
193 |     check()


--------------------------------------------------------------------------------
/data/data_loader.py:
--------------------------------------------------------------------------------
  1 | from typing import List 
  2 | from .mono_datasets import *
  3 | from .stereo_datasets import *
  4 | from config import TrainingConfig
  5 | from omegaconf import OmegaConf
  6 | from torch.utils.data.dataset import ConcatDataset
  7 | from utils.camera import Realsense, RGBDCamera
  8 | 
  9 | def create_dataset(config: TrainingConfig, dataset_name, split = "train"):
 10 |     mono_lst = ['NYUv2', 'ScanNet', 'HyperSim', 'SceneNet', 'ScanNetpp', 'VK2', 'KITTI', "Middlebury", "InStereo2K", "Tartenair", "HRWSI", "SynTODD"]
 11 |     stereo_lst = ["Dreds",  "Middlebury", "SceneFlow", "Real", "HISS", "ClearPose", "SynTODDRgbd", "Gapartnet2"]
 12 |     image_size = tuple(config.image_size)
 13 | 
 14 |     if len(dataset_name.split("_")) > 1: # Real_split_device
 15 |         dataset_name, split, device = dataset_name.split("_")
 16 | 
 17 |     from utils.utils import Normalizer
 18 |     normalizer = Normalizer.from_config(config)
 19 |     
 20 |     if dataset_name in stereo_lst:
 21 |         cam_res = [int(x) for x in config.camera_resolution.split("x")[::-1]] 
 22 |         
 23 |         if split == "train":
 24 |             # dataset = eval(dataset_name)(f"datasets/{dataset_name}", split="train", image_size=config.image_size, augment=config.augment, camera = config.camera)
 25 |             aug_params = {"crop_size": image_size, 
 26 |                           "min_scale": config.augment["min_scale"], 
 27 |                           "max_scale": config.augment["max_scale"],
 28 |                           "yjitter": config.augment["yjitter"]}
 29 |             aug_params["saturation_range"] = tuple(config.augment["saturation_range"])
 30 |             aug_params["gamma"] = config.augment["gamma"]
 31 |             aug_params["do_flip"] = config.augment["hflip"] #config.augment["hflip"]["prob"] > 0.0
 32 |             # aug_params["camera_resolution"] = cam_res
 33 |             if dataset_name == 'SceneFlow': # BUG? min disp=0.5, max disp=192.0?
 34 |                 disp_reader = partial(frame_utils.read_sceneflow, cam_res)
 35 |                 clean_dataset = SceneFlow(aug_params=aug_params, root="datasets/sceneflow", dstype='frames_cleanpass', 
 36 |                                                 reader=disp_reader, normalizer=normalizer)
 37 |                 final_dataset = SceneFlow(aug_params=aug_params, root="datasets/sceneflow", dstype='frames_finalpass', 
 38 |                                                 reader=disp_reader, normalizer=normalizer)
 39 |                 dataset = clean_dataset + final_dataset
 40 |             elif dataset_name == 'HISS':
 41 |                 sim_camera = DepthCamera.from_device("sim") # BUG? max depth=5.
 42 |                 # sim_camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}")
 43 |                 sim_camera.change_resolution(config.camera_resolution)
 44 |                 disp_reader = partial(frame_utils.readDispReal, sim_camera)
 45 |                 dataset = HISS(sim_camera, normalizer, image_size, split, config.prediction_space, aug_params, reader=disp_reader)
 46 |             elif dataset_name == "Dreds":
 47 |                 sim_camera = Realsense.default_sim() # BUG? max depth=2.
 48 |                 # sim_camera.change_resolution(f"{image_size[1]}x{image_size[0]}")
 49 |                 sim_camera.change_resolution(config.camera_resolution)
 50 |                 # assert image_size == (126, 224)
 51 |                 # disp_reader = partial(frame_utils.readDispDreds_exr, sim_camera)
 52 |                 dataset = Dreds(sim_camera, normalizer, image_size, split, config.prediction_space, aug_params)
 53 |             elif dataset_name == "ClearPose":
 54 |                 camera = RGBDCamera.default_clearpose() # BUG? max depth=5.
 55 |                 camera.change_resolution(config.camera_resolution)
 56 |                 disp_reader = partial(frame_utils.readDispReal, camera)
 57 |                 dataset = ClearPose(camera, normalizer, image_size, split, config.prediction_space, reader=disp_reader)
 58 |             elif dataset_name == "SynTODDRgbd":
 59 |                 camera = RGBDCamera.default_syntodd()
 60 |                 camera.change_resolution(config.camera_resolution)
 61 |                 disp_reader = partial(frame_utils.readDispReal, camera)
 62 |                 dataset = SynTODDRgbd(config.dataset_variant, camera, normalizer, image_size, split, config.prediction_space, reader=disp_reader)
 63 |             elif dataset_name == "Gapartnet2":    
 64 |                 sim_camera = Realsense.from_device("sim")
 65 |                 sim_camera.change_resolution(config.camera_resolution)
 66 |                 disp_reader = partial(frame_utils.readDispReal, sim_camera)
 67 |                 dataset = Gapartnet2(sim_camera, normalizer, image_size, split, config.prediction_space, aug_params, reader=disp_reader)
 68 |             else:
 69 |                 raise NotImplementedError
 70 |             
 71 |         else:
 72 |             if dataset_name == 'SceneFlow':
 73 |                 disp_reader = partial(frame_utils.read_sceneflow, cam_res)
 74 |                 dataset = SceneFlow(root="datasets/sceneflow", dstype='frames_cleanpass', things_test=True, 
 75 |                                             reader=disp_reader, normalizer=normalizer)
 76 |             elif dataset_name == "HISS":
 77 |                 sim_camera = Realsense.from_device("sim")
 78 |                 sim_camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}")
 79 |                 disp_reader = partial(frame_utils.readDispReal, sim_camera)
 80 |                 dataset = HISS(sim_camera, normalizer, image_size, split, space=config.prediction_space, reader=disp_reader)
 81 |             elif dataset_name == "Dreds": 
 82 |                 sim_camera = Realsense.default_sim()
 83 |                 sim_camera.change_resolution(f"{image_size[1]}x{image_size[0]}")
 84 |                 # assert image_size == (126, 224) # reprod dreds-1.0
 85 |                 # disp_reader = partial(frame_utils.readDispDreds_exr, sim_camera)
 86 |                 dataset = Dreds(sim_camera, normalizer, image_size, split, space=config.prediction_space)
 87 |             elif dataset_name == "Real":
 88 |                 real_cam = Realsense.default_real("fxm")
 89 |                 real_cam.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}")
 90 |                 dataset = Real(camera=real_cam, normalizer=normalizer,
 91 |                                image_size=image_size, scene=split, space=config.prediction_space)
 92 |             elif dataset_name == "ClearPose":
 93 |                 camera = RGBDCamera.default_clearpose()
 94 |                 camera.change_resolution(f"{image_size[1]}x{image_size[0]}")
 95 |                 disp_reader = partial(frame_utils.readDispReal, camera)
 96 |                 dataset = ClearPose(camera, normalizer, image_size, split, config.prediction_space, reader=disp_reader)
 97 |             elif dataset_name == "SynTODDRgbd":
 98 |                 camera = RGBDCamera.default_syntodd()
 99 |                 camera.change_resolution(f"{image_size[1]}x{image_size[0]}")
100 |                 disp_reader = partial(frame_utils.readDispReal, camera)
101 |                 dataset = SynTODDRgbd(config.dataset_variant, camera, normalizer, image_size, split, config.prediction_space, reader=disp_reader)
102 |             elif dataset_name == "Gapartnet2":
103 |                 sim_camera = Realsense.from_device("sim")
104 |                 sim_camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}")
105 |                 disp_reader = partial(frame_utils.readDispReal, sim_camera)
106 |                 dataset = Gapartnet2(sim_camera, normalizer, image_size, split, space=config.prediction_space, reader=disp_reader)
107 | 
108 |             else:
109 |                 raise NotImplementedError
110 |             
111 |     elif dataset_name in mono_lst:
112 |         if split == "train":
113 |             dataset= eval(dataset_name)(f"datasets/{dataset_name}", split="train", image_size=image_size, augment=config.augment)
114 |         else:
115 |             dataset = eval(dataset_name)(f"datasets/{dataset_name}", split=split, image_size=image_size, augment=None)
116 |     else:
117 |         raise NotImplementedError
118 |     return dataset
119 | 
120 | def fetch_dataloader(config: TrainingConfig):
121 |     """ Create the data loader for the corresponding trainign set """
122 |         
123 |     """ if not isinstance(config.dataset, List):
124 |         dataset_lst = [config.dataset]
125 |     else:
126 |         dataset_lst = config.dataset
127 |     
128 |     if not isinstance(config.dataset_weight, List):
129 |         weight_lst = [config.dataset_weight]
130 |     else:
131 |         weight_lst = config.dataset_weight """
132 |         
133 |     assert len(config.train_dataset) == len(config.dataset_weight)
134 |     
135 |     val_loader_lst = []
136 |     train_dataset = None 
137 |     for i, dataset_name in enumerate(config.train_dataset):
138 |         new_dataset = create_dataset(config, dataset_name, split = "train")
139 | 
140 |         # multiple dataset weights
141 |         if type(new_dataset) == ConcatDataset:
142 |             # hack: unsupported operand type(s) for *: 'ConcatDataset' and 'int'
143 |             for i in range(max(0, int(config.dataset_weight[i])-1)):
144 |                 new_dataset += new_dataset
145 |         else:
146 |             new_dataset = new_dataset * config.dataset_weight[i]
147 |         
148 |         # add train dataset together
149 |         train_dataset = new_dataset if train_dataset is None else train_dataset + new_dataset
150 |     
151 |     for i, dataset_name in enumerate(config.eval_dataset):
152 |         # saperately evaluate each dataset
153 |         val_dataset = create_dataset(config, dataset_name, split = "val")
154 |         val_dataloader = torch.utils.data.DataLoader(val_dataset,
155 |                                                 batch_size=config.eval_batch_size,
156 |                                                 shuffle=True,
157 |                                                 pin_memory=False, 
158 |                                                 drop_last=False)
159 |         val_loader_lst.append(val_dataloader)
160 |         
161 |     train_dataloader = torch.utils.data.DataLoader(train_dataset,
162 |                                                 batch_size=config.train_batch_size, 
163 |                                                 shuffle=True,
164 |                                                 pin_memory=False,
165 |                                                 num_workers=int(os.environ.get('SLURM_CPUS_PER_TASK', 6))-2, 
166 |                                                 drop_last=True)
167 | 
168 |     logging.info('Training with %d image pairs' % len(train_dataset))
169 |     return train_dataloader, val_loader_lst
170 | 
171 | 


--------------------------------------------------------------------------------
/isaacsim/replicator.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os, sys
  3 | import csv, copy, math
  4 | import time, json
  5 | import numpy as np
  6 | import random
  7 | import transforms3d as t3d
  8 | # from scipy.spatial.transform import Rotation
  9 | from typing import Union, Type, List
 10 | from functools import partial
 11 | from PIL import Image
 12 | 
 13 | import carb
 14 | import omni.replicator.core as rep
 15 | import omni.usd
 16 | from omni.isaac.kit import SimulationApp
 17 | 
 18 | from omni.isaac.core.utils.nucleus import get_assets_root_path
 19 | 
 20 | from omni.isaac.core.utils.bounds import compute_combined_aabb, create_bbox_cache
 21 | from omni.isaac.core import World
 22 | from omni.isaac.core.utils.stage import add_reference_to_stage
 23 | from omni.replicator.core import Writer, AnnotatorRegistry
 24 | from omni.isaac.core.utils.rotations import euler_angles_to_quat, quat_to_euler_angles
 25 | from omni.isaac.core.objects import DynamicCuboid
 26 | from pxr import Gf, Sdf, Usd, PhysxSchema, UsdGeom, UsdLux, UsdPhysics, UsdShade
 27 | 
 28 | # import offline_generation_utils
 29 | from hydra.utils import get_original_cwd, to_absolute_path
 30 | from omegaconf import DictConfig
 31 | 
 32 | from custom_writer import ColorWriter, GtWriter, IRWriter
 33 | from omni.replicator.core import WriterRegistry
 34 | from replicate import Replicator
 35 | 
 36 | scene_prim_path = "/World/scene" #!!
 37 | 
 38 | class IRReplicator:
 39 |     def __init__(self, app: SimulationApp, world: World, config:DictConfig) -> None:
 40 |         self._app = app
 41 |         self._world = world
 42 |         self._config = config
 43 |         self._log = self._app.app.print_and_log
 44 | 
 45 |         # Get server path
 46 |         # self.assets_root_path = get_assets_root_path()
 47 |         # if self.assets_root_path is None:
 48 |         #     carb.log_error("Could not get nucleus server path, closing application..")
 49 |         #     app.close()
 50 | 
 51 |         # load different scene replicator according to configuration
 52 |         self.replicator = Replicator.factory(world, config)
 53 | 
 54 |         # self._light: Usd.Prim = self.setup_lighting()
 55 | 
 56 |         self._scene: Usd.Prim = self.load_scene()
 57 |         # self._world.scene.add_default_ground_plane()
 58 |         """ self.scene = UsdPhysics.Scene.Define(self._world.stage, Sdf.Path("/physicsScene"))
 59 |         self.scene.CreateGravityDirectionAttr().Set(Gf.Vec3f(0.0, 0.0, -1.0))
 60 |         self.scene.CreateGravityMagnitudeAttr().Set(9.81)
 61 |         omni.kit.commands.execute(
 62 |             "AddGroundPlaneCommand",
 63 |             stage=self._world.stage,
 64 |             planePath="/groundPlane",
 65 |             axis="Z",
 66 |             size=10.000,
 67 |             position=Gf.Vec3f(0, 0, -0.01), # hack to hide ground mesh
 68 |             color=Gf.Vec3f(0.5),
 69 |         ) """
 70 | 
 71 |         # self._mats = self.load_materials()
 72 |         
 73 |         # Disable capture on play and async rendering
 74 |         carb.settings.get_settings().set("/omni/replicator/captureOnPlay", False)
 75 |         carb.settings.get_settings().set("/omni/replicator/asyncRendering", False)
 76 |         carb.settings.get_settings().set("/app/asyncRendering", False)
 77 | 
 78 |         # https://forums.developer.nvidia.com/t/replicator-images-contain-artifacts-from-other-frames/220837
 79 |         # carb.settings.get_settings().set("/rtx/ambientOcclusion/enabled", False)
 80 |         # rep.settings.set_render_rtx_realtime(antialiasing="FXAA")
 81 | 
 82 |         # start replicator
 83 |         if self._config["rt_subframes"] > 1:
 84 |             rep.settings.carb_settings("/omni/replicator/RTSubframes", self._config["rt_subframes"])
 85 |         else:
 86 |             carb.log_warn("RTSubframes is set to 1, consider increasing it if materials are not loaded on time")
 87 | 
 88 |         self.clear_previous_semantics()
 89 | 
 90 |         self.output_dir = os.path.join(os.path.dirname(__file__), config["writer_config"]["output_dir"])
 91 |         if not os.path.exists(self.output_dir):
 92 |             os.makedirs(self.output_dir)
 93 | 
 94 |         self.replicator.setup_depth_sensor()
 95 | 
 96 |         WriterRegistry.register(ColorWriter)
 97 |         WriterRegistry.register(GtWriter)
 98 |         WriterRegistry.register(IRWriter)
 99 | 
100 |         self.dr = self.replicator.setup_domain_randomization()
101 |         self._log(json.dumps(self.dr))
102 | 
103 |     def clear_previous_semantics(self):
104 |         return
105 |         if self._config["clear_previous_semantics"]:
106 |             offline_generation_utils.remove_previous_semantics(self._world.stage)
107 |     
108 |     
109 |     def setup_lighting(self):
110 |         # prim_path = "/World/DiskLight"
111 |         # diskLight = UsdLux.DiskLight.Define(self._world.stage, Sdf.Path(prim_path))
112 |         # diskLight.CreateIntensityAttr(15000)
113 |         
114 |         # light = self._world.stage.GetPrimAtPath(prim_path)
115 |         # if not light.GetAttribute("xformOp:translate"):
116 |         #     UsdGeom.Xformable(light).AddTranslateOp()
117 |         # return light
118 |         pass
119 | 
120 |     # def setup_projector_lighting(self):
121 |     #     prim_path = "/World/RectLight"
122 |     #     rectLight = UsdLux.RectLight.Define(self._world.stage, Sdf.Path(prim_path))
123 |     #     rectLight.CreateIntensityAttr(500)
124 |     #     rectLight.Create
125 |     
126 |     def load_scene(self):
127 |         scene_name = self._config["hssd"]["name"]
128 |         data_dir = os.path.abspath(self._config.hssd["data_dir"])
129 |         env_url = f"{data_dir}/{scene_name}/{scene_name}.usd"
130 |         assert os.path.exists(env_url), f"Scene file {env_url} does not exist"
131 |         add_reference_to_stage(usd_path=env_url, prim_path=scene_prim_path) 
132 | 
133 |         hssd_env = self._world.stage.GetPrimAtPath(scene_prim_path)
134 |         if not hssd_env.GetAttribute("xformOp:translate"):
135 |             UsdGeom.Xformable(hssd_env).AddTranslateOp()
136 |         if not hssd_env.GetAttribute("xformOp:rotateXYZ"):
137 |             UsdGeom.Xformable(hssd_env).AddRotateXYZOp()
138 |         if not hssd_env.GetAttribute("xformOp:scale"):
139 |             UsdGeom.Xformable(hssd_env).AddScaleOp() 
140 | 
141 |         hssd_env.GetAttribute("xformOp:rotateXYZ").Set((90, 0, 0))
142 |         scale = self._config["hssd"]["scale"]
143 |         hssd_env.GetAttribute("xformOp:scale").Set((scale, scale, scale))
144 | 
145 |         if self._config["hssd"]["hide_ceilings"]:
146 |             ceiling = hssd_env.GetPrimAtPath(f"{scene_prim_path}/ceilings")
147 |             ceiling.GetAttribute("visibility").Set("invisible")
148 | 
149 |         if self._config["hssd"]["hide_walls"]: # an ugly hack
150 |             walls = hssd_env.GetPrimAtPath(f"{scene_prim_path}/walls")
151 |             walls.GetAttribute("visibility").Set("invisible")
152 | 
153 |         return hssd_env
154 |     
155 |     # deprecated
156 |     def load_materials(self): 
157 |         #https://forums.developer.nvidia.com/t/how-can-i-change-material-of-the-existing-object-in-runtime/161253
158 |         # path_mat_glass_clear = assets_root_path + "/NVIDIA/Materials/vMaterials_2/Glass/Glass_Clear.mdl"
159 |         path_mat_glass_clear = "omniverse://localhost/NVIDIA/Materials/vMaterials_2/Glass/Glass_Clear.mdl"
160 |         # load more
161 |         success, result = omni.kit.commands.execute('CreateMdlMaterialPrimCommand',
162 |             mtl_url=path_mat_glass_clear, # This can be path to local or remote MDL
163 |             mtl_name='Glass_Clear', # sourceAsset:subIdentifier (i.e. the name of the material within the MDL)
164 |             mtl_path="/World/Looks/Glass_Clear" # Prim path for the Material to create.
165 |         )
166 |         t = UsdShade.Material(self._world.stage.GetPrimAtPath("/World/Looks/Glass_Clear"))
167 | 
168 |         path_mat_metal_aluminum = "omniverse://localhost/NVIDIA/Materials/vMaterials_2/Metal/Aluminum.mdl"
169 |         success, result = omni.kit.commands.execute('CreateMdlMaterialPrimCommand',
170 |             mtl_url=path_mat_glass_clear, # This can be path to local or remote MDL
171 |             mtl_name='Aluminum',
172 |             mtl_path="/World/Looks/Aluminum" # Prim path for the Material to create.
173 |         )
174 |         s = UsdShade.Material(self._world.stage.GetPrimAtPath("/World/Looks/Aluminum"))
175 | 
176 |         return {
177 |             'transparent': [t], # TODO add more
178 |             'specular': [s] # TODO add more
179 |         }
180 |     
181 |     # deprecated
182 |     def create_rep_object(self, surface_center_pos):
183 |         test_model = rep.create.from_usd(f"file:///home/songlin/Projects/DREDS/DepthSensorSimulator/cad_model/02691156/1c93b0eb9c313f5d9a6e43b878d5b335_converted/model_obj.usd", 
184 |             semantics=[("class", "test")])
185 |             
186 |         test_ball = rep.create.sphere(name="test_ball", position=surface_center_pos, scale=(0.1, 0.1, 0.1))
187 |         with test_model:
188 |             rep.physics.collider()
189 |             rep.physics.rigid_body(
190 |                 # velocity=rep.distribution.uniform((-0,0,-0),(0,0,1)),
191 |                 # angular_velocity=rep.distribution.uniform((-0,0,-100),(0,0,0))
192 |             )
193 | 
194 |     
195 | 
196 |     def start(self):
197 |         # self.debug = 0
198 |         # Find the desired surface
199 |         # for surface_config in self._config["hssd"]['surfaces']:
200 |             # surface = self._config["hssd"]['surface']
201 |             self.replicator.render()
202 |     
203 |     """ def randomize_texture(self, dred_models):
204 |         materials = create_materials(self._world.stage, len(dred_models))
205 |         assets_root_path = get_assets_root_path()
206 |         textures = [
207 |             assets_root_path + "/NVIDIA/Materials/vMaterials_2/Ground/textures/aggregate_exposed_diff.jpg",
208 |             assets_root_path + "/NVIDIA/Materials/vMaterials_2/Ground/textures/gravel_track_ballast_diff.jpg",
209 |             assets_root_path + "/NVIDIA/Materials/vMaterials_2/Ground/textures/gravel_track_ballast_multi_R_rough_G_ao.jpg",
210 |             assets_root_path + "/NVIDIA/Materials/vMaterials_2/Ground/textures/rough_gravel_rough.jpg",
211 |         ]
212 | 
213 |         delay=0.2
214 |         initial_materials = {} 
215 |         for i, shape in dred_models.items(): #enumerate():
216 |             cur_mat, _ = UsdShade.MaterialBindingAPI(shape).ComputeBoundMaterial()
217 |             initial_materials[shape] = cur_mat
218 |             UsdShade.MaterialBindingAPI(shape).Bind(materials[i-1], UsdShade.Tokens.strongerThanDescendants)
219 | 
220 |         for mat in materials:
221 |             shader = UsdShade.Shader(omni.usd.get_shader_from_material(mat, get_prim=True))
222 |             # diffuse_texture = np.random.choice(textures)
223 |             # shader.GetInput("diffuse_texture").Set(diffuse_texture)
224 | 
225 |             # project_uvw = np.random.choice([True, False], p=[0.9, 0.1])
226 |             # shader.GetInput("project_uvw").Set(bool(project_uvw))
227 | 
228 |             # texture_scale = np.random.uniform(0.1, 1)
229 |             # shader.GetInput("texture_scale").Set((texture_scale, texture_scale))
230 | 
231 |             # texture_rotate = np.random.uniform(0, 45)
232 |             # shader.GetInput("texture_rotate").Set(texture_rotate)
233 | 
234 |             shader.GetInput("metallic_constant").Set(1.0)
235 |             shader.GetInput("reflection_roughness_constant").Set(0.0) """
236 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h2 align="center">
  2 |   <b>D<sup>3</sup>RoMa: Disparity Diffusion-based Depth Sensing for Material-Agnostic Robotic Manipulation</b>
  3 | 
  4 |   <b><i>CoRL 2024, Munich, Germany.</i></b>
  5 | 
  6 | 
  7 | <div align="center">
  8 |     <a href="https://arxiv.org/abs/2409.14365" target="_blank">
  9 |     <img src="https://img.shields.io/badge/Paper-arXiv-green" alt="Paper arXiv"></a>
 10 |     <a href="https://pku-epic.github.io/D3RoMa/" target="_blank">
 11 |     <img src="https://img.shields.io/badge/Page-D3RoMa-blue" alt="Project Page"/></a>
 12 |     <a href="https://openreview.net/forum?id=7E3JAys1xO" target="_blank">
 13 |     <img src="https://img.shields.io/badge/Page-OpenReview-blue" alt="Open Review"/></a>
 14 | </div>
 15 | </h2>
 16 | 
 17 | This is the official repository of [**D3RoMa: Disparity Diffusion-based Depth Sensing for Material-Agnostic Robotic Manipulation**](https://arxiv.org/abs/2409.14365).
 18 | 
 19 | For more information, please visit our [**project page**](https://pku-epic.github.io/D3RoMa/).
 20 | 
 21 | [Songlin Wei](https://songlin.github.io/),
 22 | [Haoran Geng](https://geng-haoran.github.io/),
 23 | [Jiayi Chen](https://jychen18.github.io/),
 24 | [Congyue Deng](https://cs.stanford.edu/~congyue/),
 25 | [Wenbo Cui](#),
 26 | [Chengyang Zhao](https://chengyzhao.github.io/),
 27 | [Xiaomeng Fang](#),
 28 | [Leonidas Guibas](https://geometry.stanford.edu/member/guibas/), and 
 29 | [He Wang](https://hughw19.github.io/)
 30 | 
 31 | 
 32 |  
 33 | ## 💡 Updates (Feb 27, 2025)
 34 |  - [x] We just release example code for generating IR stereo images using isaac-sim 4.0.0
 35 |  - [x] We just release new model variant (Cond. on RGB+Raw), please checkout the updated inference.py
 36 |  - [x] Traning protocols and datasets
 37 | 
 38 | 
 39 | 
 40 | Our method robustly predicts transparent (bottles) and specular (basin and cups) object depths in tabletop environments and beyond.
 41 | ![teaser](assets/in-the-wild.png)
 42 | 
 43 | 
 44 | 
 45 | ## INSTALLATION 
 46 | ```
 47 | conda create --name d3roma python=3.8
 48 | conda activate d3roma
 49 | 
 50 | # install dependencies with pip
 51 | pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
 52 | pip install huggingface_hub==0.24.5
 53 | pip install diffusers opencv-python scikit-image matplotlib transformers datasets accelerate tensorboard imageio open3d kornia
 54 | pip install hydra-core --upgrade
 55 | ```
 56 | 
 57 | 
 58 | ## DOWNLOAD PRE-TRAINED WEIGHT
 59 | 
 60 | + For model variant: Cond. Left+Right+Raw [Google drive](https://drive.google.com/file/d/12BLB7mKDbLPhW2UuJSmYnwBFokOjDvC9/view?usp=sharing), [百度云](https://pan.baidu.com/s/1u7n4wstGpqwAswp8ZbTNlw?pwd=o9nk)
 61 | + For model variant: Cond. RGB+Raw [Google drive](https://drive.google.com/file/d/1cTAUZ2lXBXe4-peHLUneJ6ufQTqFr6E9/view?usp=drive_link), [百度云](https://pan.baidu.com/s/1zWwdMQ2_6-CViaC2JUGsFA?pwd=bwwb)
 62 | ```
 63 | # Download pretrained weigths from Google Drive
 64 | # Extract it under the project folder
 65 | ```
 66 | 
 67 | ## RUN INFERENCE
 68 | You can run the following script to test our model. We provided two variants `left+right+raw` for stereo cameras and `rgb+raw` for any RGBD cameras:
 69 | ```
 70 | python inference.py
 71 | ```
 72 | This will generate three files under folder `_output`: 
 73 | 
 74 | `_outputs.{variant}/pred.png`: the pseudo colored depth map
 75 | 
 76 | `_outputs.{variant}/pred.ply`: the pointcloud which ia obtained though back-projected the predicted depth
 77 | 
 78 | `_outputs.{variant}/raw.ply`: the pointcloud which ia obtained though back-projected the camera raw depth
 79 | 
 80 | 
 81 | ## Training
 82 | 
 83 | #### 1. Preparing Datasets
 84 | 
 85 | All the datasets will be linked to folder `datasets`
 86 | 
 87 | + Download [SceneFlow stereo](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html) 
 88 | 
 89 | + Download [DREDS](https://github.com/PKU-EPIC/DREDS#dataset)
 90 | 
 91 | + Download [HISS](https://drive.google.com/drive/folders/1BTbiHWIM_zQC85pz-NMVnYBdZvt1oxaV?usp=sharing)
 92 | 
 93 | + Download  [Clearpose](https://github.com/opipari/ClearPose)
 94 | 
 95 | Example datasets folder structure:
 96 | 
 97 | ```
 98 | datasets
 99 | ├── clearpose -> /raid/songlin/Data/clearpose
100 | │   ├── clearpose_downsample_100
101 | │   │   ├── downsample.py
102 | │   │   ├── model
103 | │   │   ├── set1
104 | │   │   ├── ...
105 | │   ├── metadata
106 | │   │   ├── set1
107 | │   │   ├── ...
108 | │   ├── model
109 | │   │   ├── 003_cracker_box
110 | │   │   ├── ...
111 | │   ├── set1
112 | │   │   ├── scene1
113 | │   │   ├── ...
114 | │   ├── ...
115 | ├── DREDS
116 | │   ├── test -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/test
117 | │   │   └── shapenet_generate_1216_val_novel
118 | │   ├── test_std_catknown -> /raid/songlin/Data/DREDS_ECCV2022/STD-CatKnown
119 | │   │   ├── test_0
120 | │   │   ├── ...
121 | │   ├── test_std_catnovel -> /raid/songlin/Data/DREDS_ECCV2022/STD-CatNovel
122 | │   │   └── real_data_novel
123 | │   ├── train -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/train
124 | │   │   ├── part0
125 | │   │   ├── ...
126 | │   └── val -> /raid/songlin/Data/DREDS_ECCV2022/DREDS-CatKnown/val
127 | │       └── shapenet_generate_1216
128 | ├── HISS
129 | │   ├── train -> /raid/songlin/Data/hssd-isaac-sim-100k
130 | │   │   ├── 102344049
131 | │   │   ├── 102344280
132 | │   │   ├── 103997586_171030666
133 | │   │   ├── 107734119_175999932
134 | │   │   └── bad_his.txt
135 | │   └── val -> /raid/songlin/Data/hssd-isaac-sim-300hq
136 | │       ├── 102344049
137 | │       ├── 102344280
138 | │       ├── 103997586_171030666
139 | │       ├── 107734119_175999932
140 | │       ├── 300hq.tar.gz
141 | │       ├── bad_his.txt
142 | │       └── simulation2
143 | ├── sceneflow -> /raid/songlin/Data/sceneflow
144 | │   ├── bad_sceneflow_test.txt
145 | │   ├── bad_sceneflow_train.txt
146 | │   ├── Driving
147 | │   │   ├── disparity
148 | │   │   ├── frames_cleanpass
149 | │   │   ├── frames_finalpass
150 | │   │   ├── raw_cleanpass
151 | │   │   └── raw_finalpass
152 | │   ├── FlyingThings3D
153 | │   │   ├── disparity
154 | │   │   ├── frames_cleanpass
155 | │   │   ├── frames_finalpass
156 | │   │   ├── raw_cleanpass
157 | │   │   └── raw_finalpass
158 | │   └── Monkaa
159 | │       ├── disparity
160 | │       ├── frames_cleanpass
161 | │       ├── frames_finalpass
162 | │       ├── raw_cleanpass
163 | │       └── raw_finalpass
164 | ├── README.md
165 | ```
166 | 
167 | #### 2. Data Preprocessing - resize, compute raw disparity, and filter bad images
168 | 
169 | - We resize `DREDS` dataset from `1270x720` to `640x360`, and convert raw depth to raw disparity using resized resolutions.
170 | 
171 | - If the dataset does not provide **raw disparity**, we pre-compute them by running Stereo Matching algorithms:
172 | ```
173 | # please make necessary changes to file paths, focal lengths and baselines etc.
174 | # we adapted this file from DREDS.
175 | python scripts/stereo_matching.py 
176 | ```
177 | 
178 | We also tried using [libSGM](https://github.com/fixstars/libSGM) to precompute disaprity maps for SceneFlow.
179 | The precomputed raw disparities are put under `raw_cleanpass` and `raw_finalpass` with same sub-folder paths.
180 | You can also download the [precomputed sceneflow raw disparities here](https://drive.google.com/file/d/1CZQvR-61IQ8o4n4ewNkVO9M3VCIIGHgr/view?usp=sharing).
181 | 
182 | - Sometimes the source stereo images are too challenging for computing raw disparities, so we filter them our during training.
183 | We run the following scripts to filter out very bad raw disparities and exclude them in dataloader:
184 | 
185 | ```
186 | python scritps/check_sceneflow.py
187 | python scritps/check_stereo.py
188 | ```
189 | 
190 | #### 3. Download pre-trained stable-diffusion
191 | 
192 | We use v-2.1 (resolution 768) version of stable diffusion.
193 | 
194 | Download [stablediffusion v2.1-768 checkpoints](https://huggingface.co/stabilityai/stable-diffusion-2-1/tree/main) and put in under `checkpoint/stablediffusion`
195 | 
196 | Example folder structure after downloaed (I download the checkpoint files manullay)
197 | ```
198 | checkpoint
199 | └── stable-diffusion -> /home/songlin/Projects/diff-stereo/checkpoint/stable-diffusion
200 |     ├── feature_extractor
201 |     │   └── preprocessor_config.json
202 |     ├── model_index.json
203 |     ├── scheduler
204 |     │   └── scheduler_config.json
205 |     ├── text_encoder
206 |     │   ├── config.json
207 |     │   └── model.safetensors
208 |     ├── tokenizer
209 |     │   ├── merges.txt
210 |     │   ├── special_tokens_map.json
211 |     │   ├── tokenizer_config.json
212 |     │   └── vocab.json
213 |     ├── unet
214 |     │   ├── config.json
215 |     │   └── diffusion_pytorch_model.safetensors
216 |     ├── v2-1_768-nonema-pruned.safetensors
217 |     └── vae
218 |         ├── config.json
219 |         └── diffusion_pytorch_model.safetensors
220 | 
221 | ```
222 | 
223 | #### 4. Train
224 | 
225 | ```
226 | # Because we already downloaded StableDiffusion's pretrained weights
227 | export HF_HUB_OFFLINE=True
228 | ```
229 | 
230 | We use huggingface accelerate and train on 8 A100-40G:
231 | ```
232 | cd <Project Dir>
233 | conda activate d3roma
234 | accelerate config
235 | ```
236 | 
237 | We train the variant `left+right+raw` using datasets: `SceneFlow`, `DREDS`,  and `HISS`. This variant is suitable for working with Stereo cameras.
238 | ```
239 | accelerate launch train.py \
240 |   task=train_ldm_mixed_left+right+raw \
241 |   task.tag=release \
242 |   task.eval_num_batch=10 \
243 |   task.val_every_global_steps=5000
244 | ```
245 | 
246 | We train the variant `rgb+raw` using datasets: `DREDS`, `HISS` and `ClearPose`. This variant is suitable for working with RGBD cameras.
247 | 
248 | ```
249 | accelerate launch train.py \
250 |   task=train_ldm_mixed_rgb+raw \
251 |   task.tag=release \
252 |   task.eval_num_batch=10 \
253 |   task.val_every_global_steps=5000
254 | ```
255 | 
256 | #### 5. Run tensorboard to monitor training process
257 | 
258 | ```
259 | tensorboard --logdir experiments --port 20000
260 | ```
261 | 
262 | #### 6. Distributed Evaluation
263 | 
264 | If you want to parallel evaluation on test datasets:
265 | ```
266 | accelerate launch distributed_evaluate.py task=...
267 | ```
268 | 
269 | ## Reproducing results in Paper
270 | ```
271 | accelerate launch train.py task=train_dreds_reprod
272 | 
273 | accelerate launch train.py task=train_clearpose
274 | 
275 | accelerate launch train.py task=train_syntodd_rgbd
276 | 
277 | accelerate launch train.py task=train_sceneflow
278 | ```
279 | 
280 | ## Contact
281 | If you have any questions please contact us:
282 | 
283 | Songlin Wei: slwei@stu.pku.edu.cn, Haoran Geng: ghr@berkeley.edu, He Wang: hewang@pku.edu.cn
284 | 
285 | ## Citation
286 | ```
287 | @inproceedings{
288 |   wei2024droma,
289 |   title={D3RoMa: Disparity Diffusion-based Depth Sensing for Material-Agnostic Robotic Manipulation},
290 |   author={Songlin Wei and Haoran Geng and Jiayi Chen and Congyue Deng and Cui Wenbo and Chengyang Zhao and Xiaomeng Fang and Leonidas Guibas and He Wang},
291 |   booktitle={8th Annual Conference on Robot Learning},
292 |   year={2024},
293 |   url={https://openreview.net/forum?id=7E3JAys1xO}
294 | }
295 | ```
296 | 
297 | 
298 | ## License
299 | 
300 |  This work and the dataset are licensed under [CC BY-NC 4.0][cc-by-nc].
301 | 
302 |  [![CC BY-NC 4.0][cc-by-nc-image]][cc-by-nc]
303 | 
304 |  [cc-by-nc]: https://creativecommons.org/licenses/by-nc/4.0/
305 |  [cc-by-nc-image]: https://licensebuttons.net/l/by-nc/4.0/88x31.png


--------------------------------------------------------------------------------
/isaacsim/custom_writer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import io
  3 | import json, math, copy
  4 | import numpy as np
  5 | # import cv2
  6 | import warp as wp
  7 | os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
  8 | # import open3d as o3d
  9 | from omni.replicator.core import AnnotatorRegistry, BackendDispatch, Writer, BasicWriter, WriterRegistry
 10 | 
 11 | def rgb2gray(rgb):
 12 |     return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])
 13 | 
 14 | def colorize_normals(data):
 15 |     colored_data = ((data * 0.5 + 0.5) * 255).astype(np.uint8)
 16 |     return colored_data
 17 | 
 18 | class ColorWriter(BasicWriter): 
 19 |     def __init__(
 20 |         self,
 21 |         **kwargs
 22 |     ):  
 23 |         self.version = "0.0.1"
 24 |         if "semantic_segmentation" in kwargs:
 25 |             del kwargs["semantic_segmentation"]
 26 |         if "distance_to_image_plane" in kwargs:
 27 |             del kwargs["distance_to_image_plane"]
 28 |         if "pointcloud" in kwargs:
 29 |             del kwargs["pointcloud"]
 30 | 
 31 |         if "disparity" in kwargs:
 32 |             del kwargs["disparity"]
 33 | 
 34 |         interval = kwargs.pop("interval", 1)
 35 |         ticker = kwargs.pop("ticker", None)
 36 | 
 37 |         if "start_sequence_id" in kwargs: # keep it simple here
 38 |             start_sequence_id = kwargs["start_sequence_id"]
 39 |             assert start_sequence_id >= 0, "start_sequence_id must be >= 0"
 40 |             del kwargs["start_sequence_id"]
 41 | 
 42 |         super().__init__(**kwargs)
 43 | 
 44 |         self._frame_id = 0
 45 |         self._sequence_id = start_sequence_id
 46 |         self._start_sequence_id = start_sequence_id
 47 |         self._interval = interval
 48 |         self._ticker = ticker 
 49 |         if self._ticker is None:
 50 |             self._ticker = lambda: self._frame_id
 51 | 
 52 |     def write(self, data: dict):
 53 |         if self._ticker()[0] == "rgb":
 54 |             for annotator, val in data["annotators"].items():
 55 |                 if annotator.startswith("rgb"):
 56 |                     file_path = f"{self._output_dir}/{self._sequence_id:04d}_color.png"
 57 |                     self._backend.write_image(file_path, val["RenderProduct_CameraRGB"]["data"])
 58 |             # print(f"rendered color {self._sequence_id:04d}")
 59 |             self._sequence_id += 1
 60 |         self._frame_id += 1
 61 |     
 62 |     def _write_rgb(self, data: dict, render_product_path: str, annotator: str):
 63 |         file_path = f"{render_product_path}rgb_{self._sequence_id}{self._frame_id:0{self._frame_padding}}.{self._image_output_format}"
 64 |         self._backend.write_image(file_path, data[annotator])
 65 |     
 66 |     def on_final_frame(self): # reset
 67 |         self._frame_id = 0
 68 |         self._sequence_id = self._start_sequence_id
 69 | 
 70 | class GtWriter(BasicWriter):
 71 |     """ not only render depth, but also render semantic / masks / pointcloud, etc. """
 72 | 
 73 |     def __init__(self, interval=1, depth_sensor_cfg=dict(), **kwargs):
 74 |         self.version = "0.0.1"
 75 |         ticker = kwargs.pop("ticker")
 76 | 
 77 |         conifg = copy.copy(kwargs)
 78 |         # kwargs = dict(conifg["writer_config"])
 79 | 
 80 |         if "rgb" in kwargs:
 81 |             del kwargs["rgb"]
 82 | 
 83 |         if "disparity" in kwargs: # hack
 84 |             self.render_disparity = kwargs["disparity"]
 85 |             self.depth_sensor_cfg = depth_sensor_cfg
 86 |             self.set_render_disparity()
 87 |             del kwargs["disparity"]
 88 |         else:
 89 |             self.render_disparity = False
 90 |         # kwargs["pointcloud_include_unlabelled"] = True
 91 | 
 92 |         if "start_sequence_id" in kwargs:
 93 |             start_sequence_id = kwargs["start_sequence_id"] 
 94 |             assert start_sequence_id >= 0, "start_sequence_id must be >= 0"
 95 |             del kwargs["start_sequence_id"]
 96 | 
 97 |         
 98 | 
 99 |         super().__init__(**kwargs)
100 |         self._frame_id = 0
101 |         self._sequence_id = start_sequence_id
102 |         self._start_sequence_id = start_sequence_id
103 |         self._interval = interval
104 |         self._ticker = ticker
105 |         self._last_tick = None
106 | 
107 |     def set_render_disparity(self):
108 |         FOV = np.deg2rad(self.depth_sensor_cfg["fov"])
109 |         W = self.depth_sensor_cfg["resolution"][0] 
110 |         # H = cfg["depth_sensor"]["resolution"][1]
111 |         focal = W / (2 * math.tan(FOV / 2))
112 |         # assert np.allclose(focal, 446.31), "do you have the correct focal length?"
113 |         
114 |         baseline = self.depth_sensor_cfg["placement"]["rgb_to_right_ir"] - self.depth_sensor_cfg["placement"]["rgb_to_left_ir"]
115 |         assert np.isclose(baseline, 0.055), "wrong baseline"
116 |         self.fxb = focal * baseline
117 | 
118 |     def write(self, data: dict):
119 | 
120 |         def write_exr(path, data, exr_flag=None):
121 |             """ fix for isaac-sim 2022.2.1 """
122 |             import imageio
123 |             if isinstance(data, wp.array):
124 |                 data = data.numpy()
125 | 
126 |             # Download freeimage dll, will only download once if not present
127 |             # from https://imageio.readthedocs.io/en/v2.8.0/format_exr-fi.html#exr-fi
128 |             imageio.plugins.freeimage.download()
129 |             if exr_flag == None:
130 |                 exr_flag = imageio.plugins.freeimage.IO_FLAGS.EXR_ZIP
131 | 
132 |             exr_bytes = imageio.imwrite(
133 |                 imageio.RETURN_BYTES,
134 |                 data,
135 |                 format="exr",
136 |                 flags=exr_flag,
137 |             )
138 |             self._backend.write_blob(path, exr_bytes)
139 | 
140 |         if self._ticker()[0] == "gt":
141 |             if self._last_tick is not None and self._ticker()[1] == self._last_tick:
142 |                 return  # hack to avoid duplicate frames (only happens for GT writer on isaac-sim 2023 hotfix)
143 |             for annotator, val in data["annotators"].items():
144 |                 if annotator.startswith("distance_to_image_plane"):
145 |                     # file_path = f"{self._output_dir}/{self._sequence_id:04d}_depth.png"
146 |                     # self._backend.write_image(file_path, (data[annotator]*1000).astype(np.uint16))
147 |                     # file_path = f"{self._output_dir}/{self._sequence_id:04d}_depth.npy"
148 |                     # self._backend.write_array(file_path, data[annotator])
149 | 
150 |                     file_path_exr = f"{self._output_dir}/{self._sequence_id:04d}_depth.exr"
151 |                     # self._backend.write_exr(file_path_exr, data[annotator])
152 |                     # cv2.imwrite(file_path_exr, data[annotator])
153 |                     write_exr(file_path_exr, val["RenderProduct_CameraDepth"]["data"])
154 | 
155 |                     if self.render_disparity:
156 |                         assert self.fxb is not None, "please call set_render_disparity() first"
157 |                         disparity = self.fxb / val["RenderProduct_CameraDepth"]["data"]
158 |                         # file_path = f"{self._output_dir}/{self._sequence_id:04d}_disp.npy"
159 |                         file_path_exr = f"{self._output_dir}/{self._sequence_id:04d}_disp.exr"
160 |                         # self._backend.write_array(file_path, disparity)
161 |                         # self._backend.write_exr(file_path_exr, disparity)
162 |                         # cv2.imwrite(file_path_exr, disparity)
163 |                         write_exr(file_path_exr, disparity)
164 |                         
165 |                 if annotator.startswith("semantic_segmentation"):
166 |                     semantic_seg_data = val["RenderProduct_CameraDepth"]["data"]
167 |                     height, width = semantic_seg_data.shape[:2]
168 | 
169 |                     file_path = (f"{self._output_dir}/{self._sequence_id:04d}_mask.png")
170 |                     if self.colorize_semantic_segmentation:
171 |                         semantic_seg_data = semantic_seg_data.view(np.uint8).reshape(height, width, -1)
172 |                         self._backend.write_image(file_path, semantic_seg_data)
173 |                     else:
174 |                         semantic_seg_data = semantic_seg_data.view(np.uint32).reshape(height, width)
175 |                         self._backend.write_image(file_path, semantic_seg_data)
176 | 
177 |                     id_to_labels = val["RenderProduct_CameraDepth"]["idToLabels"]
178 |                     file_path = f"{self._output_dir}/{self._sequence_id:04d}_mask.json"
179 |                     buf = io.BytesIO()
180 |                     buf.write(json.dumps({str(k): v for k, v in id_to_labels.items()}).encode())
181 |                     self._backend.write_blob(file_path, buf.getvalue())
182 | 
183 |                 if annotator.startswith("normals"):
184 |                     normals_data = val["RenderProduct_CameraDepth"]["data"]
185 |                     file_path_normal = f"{self._output_dir}/{self._sequence_id:04d}_normal.png"
186 |                     colorized_normals_data = colorize_normals(normals_data)
187 |                     self._backend.write_image(file_path_normal, colorized_normals_data)
188 | 
189 |                 if annotator.startswith("pointcloud"):
190 |                     pointcloud_data = data[annotator]["data"]
191 |                     file_path = f"{self._output_dir}/{self._sequence_id:04d}_pcd.npy"
192 |                     self._backend.write_array(file_path, pointcloud_data)
193 | 
194 |                     pointcloud_rgb = data[annotator]["info"]["pointRgb"].reshape(-1, 4)
195 |                     rgb_file_path = f"{self._output_dir}/{self._sequence_id:04d}_pcd_rgb.npy"
196 |                     self._backend.write_array(rgb_file_path, pointcloud_rgb)
197 | 
198 |                     """ pcd = o3d.geometry.PointCloud()
199 |                     pcd.points = o3d.utility.Vector3dVector(pointcloud_data.astype(np.float32).reshape(-1, 3))
200 |                     o3d.io.write_point_cloud(file_path, pcd) """
201 |             self._last_tick = self._ticker()[1]
202 |             # print(f"rendered gt {self._sequence_id:04d}")
203 |             self._sequence_id += 1
204 |         self._frame_id += 1
205 | 
206 |     def on_final_frame(self):
207 |         self._frame_id = 0
208 |         self._sequence_id = self._start_sequence_id
209 | 
210 | class IRWriter(Writer):
211 |     def __init__(
212 |         self,
213 |         output_dir,
214 |         start_sequence_id=0,
215 |         interval=1,
216 |         ticker=None,
217 |     ):
218 |         self.version = "0.0.1"
219 |         self.backend = BackendDispatch({"paths": {"out_dir": output_dir}})
220 |         self.annotators.append(AnnotatorRegistry.get_annotator("rgb"))
221 |         self._output_dir = output_dir
222 |         self._interval = interval
223 | 
224 |         assert start_sequence_id >= 0, "start_sequence_id must be >= 0"
225 |         self._frame_id = 0
226 |         self._sequence_id = start_sequence_id
227 |         self._start_sequence_id = start_sequence_id
228 |         self._ticker = ticker
229 |         if self._ticker is None:
230 |             self._ticker = lambda: self._frame_id
231 | 
232 |     def write(self, data: dict):
233 |         if self._ticker()[0] == "ir":
234 |             for annotator in data.keys():
235 |                 if annotator.startswith("rgb"):
236 |                     # ir_name = 'ir_l' if 'Left' in annotator else 'ir_r'
237 |                     ir_name = 'ir_l' if '01' in annotator else 'ir_r' # HACK
238 |                     filename = f"{self._output_dir}/{self._sequence_id:04d}_{ir_name}.png"
239 |                     self.backend.write_image(filename, rgb2gray(data[annotator]).astype(np.uint8))
240 |             # print(f"rendered ir {self._sequence_id:04d}")
241 |             self._sequence_id += 1    
242 |         self._frame_id += 1
243 |     
244 |     def on_final_frame(self):
245 |         self._frame_id = 0
246 |         self._sequence_id = self._start_sequence_id
247 | 
248 | 
249 |     


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, field
  2 | from utils.camera import DepthCamera, RGBDCamera, Realsense
  3 | 
  4 | from diffusers import DDPMScheduler, HeunDiscreteScheduler, EulerDiscreteScheduler, DDIMScheduler
  5 | from core.scheduler_ddpm import MyDDPMScheduler
  6 | from core.scheduler_ddim import MyDDIMScheduler
  7 | from typing import List, Union, Optional, Tuple
  8 | from omegaconf import MISSING, OmegaConf
  9 | from omegaconf import DictConfig, OmegaConf, ValidationError
 10 | from hydra.core.config_store import ConfigStore
 11 | 
 12 | supported_samplers = {
 13 |     'ddpm': DDPMScheduler,
 14 |     'euler': EulerDiscreteScheduler,
 15 |     'heun': HeunDiscreteScheduler,
 16 |     'ddim': DDIMScheduler,
 17 |     'my_ddim': MyDDIMScheduler,
 18 |     'my_ddpm': MyDDPMScheduler
 19 | }
 20 | 
 21 | @dataclass
 22 | class Augment:
 23 |     resizedcrop: dict = field(default_factory=lambda: {
 24 |         'scale': [2, 2], 
 25 |         'ratio': [1.33333333333333,1.33333333333333333333]
 26 |     })
 27 |     hflip: str = "h" # off
 28 |     #==== raft stereo augmentation ====#    
 29 |     min_scale: float = 0 # -0.2
 30 |     max_scale: float = 0 # 0.4
 31 |     saturation_range: List[float] = field(default_factory=lambda: [0, 1.4])
 32 |     gamma: List[float] = field(default_factory=lambda: [1,1,1,1])
 33 |     yjitter: bool =False
 34 | 
 35 | @dataclass
 36 | class TrainingConfig:
 37 |     name: Optional[str] = "your task name here"
 38 |     tag: str = "" # your tag here
 39 |     camera_resolution: str = "320x256" # "224x128" # WxH dataset camera resolution, default "640x360"
 40 |     image_size: Tuple[int] = field(default_factory=lambda: (256, 320)) # (128, 224) #(352, 640) # [h,w] training image size
 41 |     divis_by: int = 32
 42 |     # image_size: tuple = (126, 224) # (128, 224) #(352, 640) # [h,w] training image size
 43 |     depth_channels: int = 1
 44 |     cond_channels: str = "rgb" # "rgb+raw" # "left+right" # "rgb+left+right"  # "left+right+raw" # "left+right+raw"
 45 |     train_batch_size: int = 12 # 16
 46 |     eval_batch_size: int = 12
 47 |     eval_num_batch: int = 2 # if set to -1, will evaluate whole val set
 48 | 
 49 |     num_epochs: int = 1000
 50 |     gradient_accumulation_steps: int = 3
 51 |     clip_grad_norm: float = 1.0
 52 |     
 53 |     lr_warmup_steps: int = 500
 54 |     val_every_global_steps: int = 1000
 55 |     save_model_epochs: int = 10
 56 |     mixed_precision: str = "no"  # `no` for float32, `fp16` for automatic mixed precision
 57 |     
 58 |     push_to_hub: bool = False  # whether to upload the saved model to the HF Hub
 59 |     hub_model_id: str = "<your-username>/<my-awesome-model>"  # the name of the repository to create on the HF Hub
 60 |     hub_private_repo: bool = False
 61 |     overwrite_output_dir: bool = True  # overwrite the old model when re-running the notebook
 62 |     # seed: int = 0
 63 | 
 64 |     train_dataset: List[str] = field(default_factory=lambda: ['NYUv2'])  #"std_100k" #  
 65 |     eval_dataset: List[str] = field(default_factory=lambda: ['NYUv2'])  #"std_100k" #  
 66 |     dataset_weight: List[int] = field(default_factory=lambda: [1])
 67 |     dataset_variant: str = "default"
 68 | 
 69 |     #### training settings
 70 |     ldm: bool = True
 71 |     prediction_space: str = "depth" # or "disp" ?
 72 |     ssi: bool = False
 73 |     # data normalizer
 74 |     normalize_mode: str = "average"
 75 |     num_chs: int = 3
 76 |     ch_bounds: List[float] = field(default_factory=lambda: [256, 256, 256])#[64, 64, 128]
 77 |     ch_gammas: List[float] = field(default_factory=lambda: [1/3., 1/3., 1/3. ])#[1., 1/3, 1/3]
 78 |     norm_t: float = 0.5
 79 |     norm_s: float = 2.0
 80 | 
 81 |     num_train_timesteps: int = 128 #1000 # diff-11
 82 |     num_inference_timesteps: int = 128 #1000 # diff-11
 83 |     num_inference_rounds: int = 1
 84 |     noise_strategy: str = 'randn' # ['randn', 'pyramid']
 85 |     loss_type: str = "l1" # "mse"
 86 |     learning_rate: float = 1e-4
 87 |     clip_gradient: bool = False
 88 | 
 89 |     #### scheduler
 90 |     clip_sample: bool = True
 91 |     clip_sample_range: float = 1.0
 92 |     thresholding: bool = False
 93 |     dynamic_thresholding_ratio: float = 0.995
 94 |     num_cycles: int = 1 
 95 |     beta_schedule: str = "squaredcos_cap_v2" # "linear"
 96 |     beta_start: float = 1e-4
 97 |     beta_end: float = 2e-2
 98 |     noise_rgb: bool = False
 99 |     
100 |     sampler: str = "my_ddpm"
101 |     prediction_type: str = "v_prediction" # "sample" #  "epsilon" # 
102 | 
103 |     #### guidance settings
104 |     flow_guidance_weights: List[float] = field(default_factory=lambda: [0.0])
105 |     perturb_start_ratio: float = 1.0 # @deprecated
106 |     guide_source: Optional[Union[str, None]] = None # "raw|stereo-match"
107 |     flow_guidance_mode: str = "imputation"
108 |     
109 |     #### evaluation settings
110 |     eval_output: str = ""
111 |     eval_split: str = "val" # "test"
112 |     write_pcd: bool = False
113 |     num_intermediate_images: int = 8
114 |     plot_mask: bool = False
115 |     plot_error_map: bool = True
116 |     plot_denoised_images: bool = True
117 |     plot_intermediate_images: bool = False
118 |     plot_intermediate_metrics: bool = False
119 |     experiment_dir: str = "experiments"
120 |     safe_ssi: bool = False # do ransac when align scales, only valid when ssi is on, should be turn off when training
121 |     ransac_error_threshold: float = 0.6 # squared error, 0.6 works for nyu
122 |     ensemble: bool = False
123 |     coarse_to_fine: bool = False
124 |     
125 |     #### resume checkpoints
126 |     resume_pretrained: Optional[str] = ""
127 |     resume_ckpt: Optional[str] = ""
128 | 
129 |     #### experiment output directory, will be overriden automatically
130 |     output_dir: Optional[str] = ""
131 | 
132 |     augment: Augment=field(default_factory=Augment) #Augment= MISSING #
133 | 
134 |     ### networks
135 |     block_out_channels: Tuple[int] = field(default_factory=lambda: (128, 128, 256, 256, 512, 512))
136 |     lr_scheduler: Optional[str] = "cosine"
137 | 
138 | @dataclass
139 | class Config:
140 |     debug: bool = False
141 |     seed: int = -1
142 |     task: TrainingConfig = MISSING
143 | 
144 | def setup_hydra_configurations():
145 |     # setup hydra configurations
146 |     cs = ConfigStore.instance()
147 |     cs.store(name="base_config", node=Config)
148 | 
149 |     cs = ConfigStore.instance()
150 |     cs.store(
151 |         group="task",
152 |         name="cfg",
153 |         node=TrainingConfig
154 |     )
155 | 
156 | def get_output_dir(base_config: Config):
157 |     config = base_config.task
158 |     ssi = "ssi" if config.ssi else "nossi"
159 |     datasets = "_".join(config.train_dataset)
160 |     weights = "_".join(format(x, ".1f") for x in config.flow_guidance_weights)
161 |     tag = "" if config.tag=="" else f"-{config.tag}"
162 | 
163 |     return f"{config.experiment_dir}/{config.name}{tag}.dep{config.depth_channels}.lr{config.learning_rate:.0e}.{config.prediction_type}.{ssi}.{config.beta_schedule}.{config.noise_strategy}." + \
164 |             f"{config.sampler}{config.num_train_timesteps}." + \
165 |             f"{datasets}.{config.image_size[0]}x{config.image_size[1]}.{config.cond_channels}." + \
166 |             f"w{weights}" + ("_debug" if base_config.debug else "")
167 | 
168 | def set_debug(config: TrainingConfig):
169 |     config.val_every_global_steps = 10 #1000#
170 |     config.save_model_epochs = 1
171 |     config.train_batch_size = 1
172 |     config.eval_batch_size = 1
173 |     config.beta_schedule = "linear"
174 |     config.beta_start = 1e-4
175 |     config.beta_end = 2e-1
176 |     # config.dataset = "nyu_depth_v2" # "std_debug" #720x360
177 |     config.num_train_timesteps = 128 # 128#
178 |     config.num_inference_timesteps = 128 # 128#
179 |     config.num_intermediate_images = 4
180 |     # config.output_dir = f"{config.output_dir}_debug"
181 | 
182 | def create_sampler(config, train=True):
183 |     if config.sampler not in supported_samplers.keys():
184 |         raise ValueError("Sampler not found")
185 | 
186 |     opt = {
187 |         "num_train_timesteps": config.num_train_timesteps if train else config.num_inference_timesteps
188 |     }
189 |     
190 |     if train:
191 |         assert "ddim" not in config.sampler, "DDIM should not be used for training"
192 |     
193 |     opt["clip_sample"] = config.clip_sample
194 |     opt["prediction_type"] = config.prediction_type
195 |     opt["beta_schedule"] =  config.beta_schedule
196 |     opt["beta_start"] = config.beta_start
197 |     opt["beta_end"] = config.beta_end
198 |     opt["num_train_timesteps"] = config.num_train_timesteps
199 |     
200 |     if config.sampler == "my_ddpm" or config.sampler == "ddpm":
201 |         opt["clip_sample_range"] = config.clip_sample_range
202 |         opt["thresholding"] = config.thresholding
203 |         opt["dynamic_thresholding_ratio"] = config.dynamic_thresholding_ratio
204 |     elif config.sampler == "my_ddim" or config.sampler == "ddim":
205 |         opt["set_alpha_to_one"] = False
206 |         opt["skip_prk_steps"] = True
207 |         opt["steps_offset"] = 1
208 |         opt["trained_betas"] = None
209 |     else:
210 |         raise ValueError("Sampler may not be configured properly?!")
211 |     
212 |     return supported_samplers[config.sampler].from_config(opt)
213 | 
214 | ########### TESTING BELOW, INGNORE #############
215 | 
216 | def plot_iddpm_figure_1():
217 |     def distortion(delta, sqared_err):
218 |         # return ( math.log(1/math.sqrt(2*math.pi)) - math.log(delta) - 0.5 * sqared_err / delta**2)
219 |         log_scales = th.FloatTensor([0.5 * math.log(delta)]) # 0.5 * log_variance
220 |         centered_x = 0.95/256/256
221 |         x = th.FloatTensor([0.5])
222 |         
223 |         inv_stdv = th.exp(-log_scales)
224 |         plus_in = inv_stdv * (centered_x + 1.0 / 255.0)
225 |         cdf_plus = approx_standard_normal_cdf(plus_in)
226 |         min_in = inv_stdv * (centered_x - 1.0 / 255.0)
227 |         cdf_min = approx_standard_normal_cdf(min_in)
228 |         log_cdf_plus = th.log(cdf_plus.clamp(min=1e-12))
229 |         log_one_minus_cdf_min = th.log((1.0 - cdf_min).clamp(min=1e-12))
230 |         cdf_delta = cdf_plus - cdf_min
231 |         log_probs = th.where(
232 |             x < -0.999,
233 |             log_cdf_plus,
234 |             th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))),
235 |         )
236 |         assert log_probs.shape == x.shape
237 |         return log_probs
238 |     
239 |     # config.set_debug()
240 |     T = 4000
241 |     config.num_train_timesteps = config.num_inference_timesteps = T
242 |     config.beta_schedule = "squaredcos_cap_v2"
243 |     scheduler = create_sampler(config)
244 |     print(distortion(scheduler.betas[0], 0.95**2))
245 |     print(normal_kl(scheduler.alphas_cumprod[-1]**0.5*2, math.log(1-scheduler.alphas_cumprod[-1]), 0, 0)) # Section 4 ddpm: keep SNR at X_T ~= 1e-5 (=10**-5)
246 | 
247 |     vlb = []
248 |     for t in range(T):
249 |         vlb.append(
250 |             normal_kl(scheduler.alphas_cumprod[t]**0.5*2, math.log(1-scheduler.alphas_cumprod[t]), 0, 0)
251 |         )
252 |     
253 |     x = np.linspace(0, 1, T)
254 |     y = scheduler.betas_tilde / scheduler.betas
255 |     plt.plot(x, y, label="4000")
256 | 
257 |     T = 1000
258 |     config.num_train_timesteps = config.num_inference_timesteps = T
259 |     scheduler = create_sampler(config)
260 |     print(distortion(scheduler.betas[0], 0.95**2))
261 |     x2 = np.linspace(0, 1, T)
262 |     y2 = scheduler.betas_tilde / scheduler.betas
263 |     
264 |     plt.plot(x2, y2, label="1000")
265 |     print(normal_kl(scheduler.alphas_cumprod[-1]**0.5, math.log(1-scheduler.alphas_cumprod[-1]), 0, 0)) # Section 4 ddpm: keep SNR at X_T ~= 1e-5 (=10**-5)
266 | 
267 |     T = 128
268 |     config.num_train_timesteps = config.num_inference_timesteps = T
269 |     scheduler = create_sampler(config)
270 |     alphas_cumprod_128 = scheduler.alphas_cumprod
271 |     print(distortion(scheduler.betas[0], 0.95**2))
272 |     x3 = np.linspace(0, 1, T)
273 |     y3 = scheduler.betas_tilde / scheduler.betas
274 |     plt.plot(x3, y3, label="128")
275 |     plt.xlabel("t/T")
276 |     plt.ylabel("~beta_t/beta_t")
277 |     plt.legend(loc="upper right")
278 |     plt.savefig("Figure 1.png")  # Figure 1 in iDDPM
279 |     print(normal_kl(scheduler.alphas_cumprod[-1], math.log(1-scheduler.alphas_cumprod[-1]), 0, 0)) # Section 4 ddpm: keep SNR at X_T ~= 1e-5 (=10**-5)
280 | 
281 | def plot_iddpm_figure_2():
282 |     T = 128
283 |     config.num_train_timesteps = config.num_inference_timesteps = T
284 |     scheduler = create_sampler(config)
285 |     x = np.linspace(0, 1, T)
286 |     vlbs = []
287 |     for t in range(T):
288 |         vlbs.append(
289 |             normal_kl(0, math.log(1-scheduler.alphas_cumprod[t]), 0, 0)
290 |         )
291 |     
292 | def plot_iddpm_figure_5():
293 |     T = 1000
294 |     config.num_train_timesteps = config.num_inference_timesteps = T
295 |     config.beta_schedule = "linear"
296 |     scheduler = create_sampler(config)
297 |     alphas_cumprod_linear = scheduler.alphas_cumprod
298 | 
299 |     T = 1000
300 |     config.num_train_timesteps = config.num_inference_timesteps = T
301 |     config.beta_schedule = "squaredcos_cap_v2"
302 |     scheduler = create_sampler(config)
303 |     alphas_cumprod_cosine = scheduler.alphas_cumprod
304 |     
305 |     x = np.linspace(0, 1, T)
306 |     plt.figure()
307 |     plt.plot(x, alphas_cumprod_linear, label="linear")
308 |     plt.plot(x, alphas_cumprod_cosine, label="cosine")
309 |     plt.legend(loc="upper right")
310 |     plt.xlabel("diffusion step t/T")
311 |     plt.ylabel("alpha bar")
312 |     plt.savefig("Figure 5.png")
313 | 
314 | def plot_snr():
315 |     T = 128
316 |     config.num_train_timesteps = T
317 |     config.beta_schedule = "linear"
318 |     scheduler = create_sampler(config)
319 |     plt.figure()
320 |     
321 |     x = np.linspace(0, T, T)
322 |     snr_linear = scheduler.alphas_cumprod / ( 1-scheduler.alphas_cumprod)
323 |     # plt.plot(x, snr_linear, label="SNR Linear")
324 |     plt.plot(x, snr_linear ** 0.5, label="sqrt SNR Linear")
325 |     # plt.plot(x, th.log(snr_linear), label="log SNR Linear")
326 | 
327 |     config.beta_schedule = "squaredcos_cap_v2"
328 |     scheduler = create_sampler(config)
329 |     
330 |     x = np.linspace(0, T, T)
331 |     snr_cosine = scheduler.alphas_cumprod / ( 1-scheduler.alphas_cumprod)
332 |     # plt.plot(x, snr_cosine, label="SNR cosine")
333 |     plt.plot(x, snr_cosine ** 0.5, label="sqrt SNR cosine")
334 |     # plt.plot(x, th.log(snr_cosine), label="log SNR cosine") 
335 |     plt.xlabel("t/T")
336 |     plt.ylabel("SNR")
337 |     plt.legend(loc="upper right")
338 |     plt.savefig("Figure_SNR.png")
339 | 
340 | def plot_sample_t():
341 |     T = 128
342 |     config.num_train_timesteps = T
343 |     config.beta_schedule = "squaredcos_cap_v2"
344 |     scheduler = create_sampler(config)
345 |     snr_cosine = scheduler.alphas_cumprod / ( 1-scheduler.alphas_cumprod)
346 |     from core.resample import create_named_schedule_sampler
347 |     t_sampler = create_named_schedule_sampler("snr", (snr_cosine ** 0.5 + 1).cpu().numpy())
348 |     timestemps, weights = t_sampler.sample(128, "cpu")
349 |     # print(timestemps, weights)
350 |     plt.figure()
351 |     fig, axs = plt.subplots(1, 2, sharey=True, tight_layout=True)
352 |     axs[0].hist(timestemps, bins=T)
353 |     axs[1].hist(weights, bins=T)
354 |     # print(weights.mean())
355 |     plt.savefig("Figure_sampled_t.png")
356 | 
357 | if __name__ == "__main__": # DEBUG & PLOT schdulers
358 |     config = TrainingConfig()
359 | 
360 |     from utils.losess import *
361 | 
362 |     import matplotlib.pyplot as plt
363 |     import numpy as np
364 |     import torch as th
365 |     import math
366 | 
367 |     # plot_iddpm_figure_1() 
368 |     # plot_iddpm_figure_2()
369 |     # plot_iddpm_figure_5()
370 |     plot_snr()
371 |     plot_sample_t()
372 | 
373 |     """ 
374 |     # resolution is irrelanvent for predicting depth
375 |     print(config.camera.resolution_str)
376 |     print(config.camera.resolution)
377 |     print(config.camera.fxb)
378 | 
379 |     fxb = config.camera.fxb #* 2.5
380 |     disp = fxb / 0.75
381 |     print(disp)
382 |     disp_2 = fxb / (0.75 + 0.001)
383 |     print(disp_2 - disp)
384 |     print(f"{(disp_2 - disp) / disp * 100} %") """
385 | 
386 | 
387 | 
388 | 
389 |     
390 | 
391 | 


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import torch
  4 | import numpy as np
  5 | from functools import partial
  6 | from utils.camera import Realsense
  7 | 
  8 | def denormalize(config, pred_disps, raw_disp=None, mask=None):
  9 |     from utils.utils import Normalizer
 10 |     norm = Normalizer.from_config(config)
 11 | 
 12 |     if config.ssi:
 13 |         # assert config.depth_channels == 1, "fixme"
 14 |         B, R, H, W = pred_disps.shape
 15 |         # scale-shift invariant evaluation, consider using config.safe_ssi if the ssi computation is not stable
 16 |         batch_pred = pred_disps.reshape(-1, H*W) # BR, HW
 17 |         batch_gt = raw_disp.repeat(1, R, 1, 1).reshape(-1, H*W) # BR, HW
 18 |         batch_mask = mask.repeat(1, R, 1, 1).reshape(-1, H*W)
 19 |         if config.safe_ssi:
 20 |             from utils.ransac import RANSAC
 21 |             regressor = RANSAC(n=0.1, k=10, d=0.2, t=config.ransac_error_threshold)
 22 |             regressor.fit(batch_pred, batch_gt, batch_mask)
 23 |             st = regressor.best_fit
 24 |             print(f"safe ssi in on: n=0.1, k=10, d=0.2, t={config.ransac_error_threshold}")
 25 |         else:
 26 |             print("directly compute ssi")
 27 |             from utils.utils import compute_scale_and_shift
 28 |             st = compute_scale_and_shift(batch_pred, batch_gt, batch_mask) # BR, HW
 29 | 
 30 |         s, t = torch.split(st.view(B, R, 1, 2), 1, dim=-1)
 31 |         pred_disps_unnormalized = pred_disps * s + t
 32 |     else:
 33 |         pred_disps_unnormalized = norm.denormalize(pred_disps)
 34 | 
 35 |     return pred_disps_unnormalized
 36 | 
 37 | class D3RoMa():
 38 |     def __init__(self, overrides=[], camera=None, variant="left+right+raw"):
 39 |         assert variant in ["left+right+raw", "rgb+raw"], "not released yet"
 40 | 
 41 |         from config import TrainingConfig, setup_hydra_configurations
 42 |         self.camera: Realsense = camera
 43 | 
 44 |         setup_hydra_configurations()
 45 |         from hydra import compose, initialize
 46 |         with initialize(version_base=None, config_path="conf", job_name="inference"):
 47 |             base_cfg = compose(config_name="config.yaml", overrides=overrides)
 48 | 
 49 |         if base_cfg.seed != -1:
 50 |             from utils.utils import seed_everything
 51 |             seed_everything(base_cfg.seed) # for reproducing
 52 | 
 53 |         config: TrainingConfig = base_cfg.task
 54 |         self.camera.change_resolution(f"{config.image_size[1]}x{config.image_size[0]}")
 55 |         self.pipeline =  self._load_pipeline(config)
 56 | 
 57 |         self.eval_output_dir = f"_outputs.{variant}"
 58 |         if not os.path.exists(self.eval_output_dir):
 59 |             os.makedirs(self.eval_output_dir, exist_ok=True)
 60 | 
 61 |         from utils.utils import Normalizer
 62 |         self.normer = Normalizer.from_config(config)
 63 |         self.config = config
 64 |         self.variant = variant
 65 | 
 66 |     def _load_pipeline(self, config):
 67 |         patrained_path = f"{config.resume_pretrained}"
 68 |         if os.path.exists(patrained_path):
 69 |             print(f"load weights from {patrained_path}")
 70 |             
 71 |             from core.custom_pipelines import GuidedDiffusionPipeline, GuidedLatentDiffusionPipeline
 72 |             clazz_pipeline = GuidedLatentDiffusionPipeline if config.ldm else GuidedDiffusionPipeline
 73 |             pipeline = clazz_pipeline.from_pretrained(patrained_path).to("cuda")
 74 |             # model = UNet2DConditionModel.from_pretrained(patrained_path)
 75 |             pipeline.guidance.flow_guidance_mode=config.flow_guidance_mode
 76 | 
 77 |             if config.sampler == "my_ddim":
 78 |                 from core.scheduler_ddim import MyDDIMScheduler
 79 |                 my_ddim = MyDDIMScheduler.from_config(dict(
 80 |                     beta_schedule = config.beta_schedule,
 81 |                     beta_start = config.beta_start,
 82 |                     beta_end = config.beta_end,
 83 |                     clip_sample = config.clip_sample,
 84 |                     num_train_timesteps = config.num_train_timesteps,
 85 |                     prediction_type = config.prediction_type,
 86 |                     set_alpha_to_one = False,
 87 |                     skip_prk_steps = True,
 88 |                     steps_offset = 1,
 89 |                     trained_betas = None
 90 |                 ))
 91 |                 pipeline.scheduler = my_ddim
 92 |                 print(f"Careful! sampler is overriden to {config.sampler}")
 93 |         else:
 94 |             raise ValueError(f"patrained path not exists: {patrained_path}")
 95 |         
 96 |         return pipeline
 97 |     
 98 |     @torch.no_grad()
 99 |     def infer_with_rgb_raw(self, rgb: np.ndarray, raw_depth: np.ndarray):
100 |         """Depth restoration with RGB and raw depth (RGB and depth SHOULD be aligned!)
101 |         
102 |         Args:
103 |             rgb (np.ndarray): RGB image or gray image
104 |             raw (np.ndarray): raw depth image from camera sensors, unit is meter
105 | 
106 |         Returns:
107 |             np.ndarray: restored depth image, unit is meter
108 |         """
109 | 
110 |         assert rgb.dtype == np.uint8
111 |         if len(rgb.shape[:2]) != len(raw_depth.shape[:2]):
112 |             rgb = cv2.resize(rgb, dsize=raw_depth.shape[:2][::-1], interpolation=cv2.INTER_LINEAR)
113 | 
114 |         if len(rgb.shape) == 2:
115 |             # grayscale images
116 |             rgb = np.tile(rgb[...,None], (1, 1, 3))
117 |         else:
118 |             rgb = rgb[..., :3]
119 |         
120 |         rgb = cv2.resize(rgb, self.camera.resolution[::-1], interpolation=cv2.INTER_LINEAR)
121 |         rgb = torch.from_numpy(rgb).permute(2, 0, 1).float()
122 | 
123 |         if len(raw_depth.shape) == 2:
124 |             raw_depth = raw_depth[...,None]
125 |         raw_depth = torch.from_numpy(raw_depth).permute(2, 0, 1).float()
126 | 
127 |         assert self.config.prediction_space == "disp", "not implemented"
128 |         raw_disp = torch.zeros_like(raw_depth)
129 |         raw_valid = (raw_depth > 0)
130 |         raw_disp[raw_valid] = self.camera.fxb_depth / raw_depth[raw_valid]
131 |         
132 |         # normalized_raw_disp = self.normer.normalize(raw_disp)[0]
133 |         return self.run_pipeline(None, None, raw_disp, rgb)
134 | 
135 |     @torch.no_grad()
136 |     def infer(self, left: np.ndarray, right: np.ndarray, raw_depth: np.ndarray=None, rgb:np.ndarray=None):
137 |         """Depth restoration with left, right and raw depth
138 |         
139 |         Args:
140 |             left (np.ndarray): left (IR) image
141 |             right (np.ndarray): right (IR) image 
142 |             raw (np.ndarray): raw depth image from camera sensors, unit is meter (optional)
143 |             rgb (np.ndarray): RGB image (optional) for point cloud visualization only
144 | 
145 |         Returns:
146 |             np.ndarray: restored depth image, unit is meter
147 |         """
148 |         assert len(left.shape) == len(right.shape)
149 |         assert left.dtype == right.dtype == np.uint8
150 | 
151 |         if raw_depth is None or rgb is None:
152 |             raise NotImplementedError("no worry, i will implement this soon")
153 |         
154 |         # assert raw.dtype == np.float32
155 |         # if len(raw.shape) == 2:
156 |         #     raw = raw[...,None]
157 | 
158 |         if len(left.shape) == 2:
159 |             # grayscale images
160 |             left = np.tile(left[...,None], (1, 1, 3))
161 |             right = np.tile(right[...,None], (1, 1, 3))
162 |         else:
163 |             left = left[..., :3]
164 |             right = right[..., :3]
165 |         
166 |         left = cv2.resize(left, self.camera.resolution[::-1], interpolation=cv2.INTER_LINEAR)
167 |         right = cv2.resize(right, self.camera.resolution[::-1], interpolation=cv2.INTER_LINEAR)
168 | 
169 |         left = torch.from_numpy(left).permute(2, 0, 1).float()
170 |         right = torch.from_numpy(right).permute(2, 0, 1).float()
171 | 
172 |         if rgb is not None:
173 |             rgb = cv2.resize(rgb, self.camera.resolution[::-1], interpolation=cv2.INTER_LINEAR)
174 |             rgb = torch.from_numpy(rgb).permute(2, 0, 1).float()
175 |             
176 |         raw_depth = cv2.resize(raw_depth, dsize=self.camera.resolution[::-1], interpolation=cv2.INTER_NEAREST)
177 |         if len(raw_depth.shape) == 3 and raw_depth.shape[-1] == 3:
178 |             raw_depth = raw_depth [...,0]
179 |         if len(raw_depth.shape) == 2:
180 |             raw_depth = raw_depth[...,None]
181 |         raw_depth = torch.from_numpy(raw_depth).permute(2, 0, 1).float()
182 | 
183 |         assert self.config.prediction_space == "disp", "not implemented"
184 |         raw_disp = torch.zeros_like(raw_depth)
185 |         raw_valid = (raw_depth > 0)
186 |         raw_disp[raw_valid] = self.camera.fxb_depth / raw_depth[raw_valid]
187 |         
188 |         assert left.shape[1] % 8 == 0 and left.shape[2] % 8 == 0, "image size must be multiple of 8"
189 |         return self.run_pipeline(left, right, raw_disp, rgb)
190 |         
191 |     def run_pipeline(self, left_image, right_image, raw_disp, rgb):
192 |         device = "cuda" if torch.cuda.is_available() else "cpu" # "cpu" #
193 |         normalize_rgb_fn = lambda x: (x / 255. - 0.5) * 2
194 |         
195 |         #  batchify
196 |         if rgb is not None:
197 |             normalized_rgb = normalize_rgb_fn(rgb).to(device)
198 |             normalized_rgb = normalized_rgb.unsqueeze(0).repeat(self.config.num_inference_rounds, 1, 1, 1)
199 | 
200 |         if left_image is not None and right_image is not None:
201 |             left_image = normalize_rgb_fn(left_image).to(device)
202 |             right_image = normalize_rgb_fn(right_image).to(device)
203 | 
204 |             left_image = left_image.unsqueeze(0).repeat(self.config.num_inference_rounds, 1, 1, 1)
205 |             right_image = right_image.unsqueeze(0).repeat(self.config.num_inference_rounds, 1, 1, 1)
206 | 
207 |         raw_disp = raw_disp.to(device)
208 |         normalized_raw_disp = self.normer.normalize(raw_disp)[0] # normalized sim disp
209 |         normalized_raw_disp = normalized_raw_disp.unsqueeze(0).repeat(self.config.num_inference_rounds, 1, 1, 1)
210 | 
211 |         raw_disp = raw_disp.unsqueeze(0).repeat(self.config.num_inference_rounds, 1, 1, 1)
212 |         mask = (raw_disp > 0).float()
213 | 
214 |         denorm = partial(denormalize, self.config)
215 |         self.pipeline.set_progress_bar_config(desc=f"Denoising")
216 | 
217 |         pred_disps = self.pipeline(normalized_rgb, left_image, right_image, normalized_raw_disp, raw_disp, mask,
218 |                 num_inference_steps=self.config.num_inference_timesteps,
219 |                 num_intermediate_images=self.config.num_intermediate_images, # T
220 |                 add_noise_rgb=self.config.noise_rgb,
221 |                 depth_channels=self.config.depth_channels,
222 |                 cond_channels=self.config.cond_channels,
223 |                 denorm = denorm
224 |             ).images
225 |         
226 |         if pred_disps.shape[0] > 1: # B is actually num_inference_rounds
227 |             uncertainties = np.zeros_like(raw_disp)
228 |             uncertainties[mask] = np.std(pred_disps.cpu().numpy(), axis=0)[mask]
229 |         else:
230 |             uncertainties = None
231 | 
232 |         pred_disps_unnormalized = denormalize(self.config, pred_disps, raw_disp, mask)
233 |         pred_disps_unnormalized = pred_disps_unnormalized.mean(dim=0)
234 |         
235 |         if True:
236 |             from utils.utils import compute_errors, metrics_to_dict, pretty_json
237 |             metrics = compute_errors(raw_disp[0].cpu().numpy(), 
238 |                                 pred_disps_unnormalized.cpu().numpy(),
239 |                                 self.config.prediction_space,
240 |                                 mask[0].cpu().numpy().astype(bool), 
241 |                                 [self.camera.fxb_depth])
242 |             
243 |             metrics = metrics_to_dict(*metrics)
244 |             print((f"metrics:{pretty_json(metrics)}"))
245 | 
246 |         pred_disps_unnormalized = pred_disps_unnormalized[0].cpu().numpy()
247 |         pred_depth = np.zeros_like(pred_disps_unnormalized)
248 |         pred_mask = (pred_disps_unnormalized > 0)
249 |         pred_depth[pred_mask] = self.camera.fxb_depth / pred_disps_unnormalized[pred_mask]
250 |         return pred_depth
251 | 
252 | 
253 | if __name__ == "__main__":
254 |     from utils.camera import Realsense
255 |     camera = Realsense.default_real("fxm")
256 |     overrides = [
257 |         # uncomment if you choose variant left+right+raw
258 |         # "task=eval_ldm_mixed",
259 |         # "task.resume_pretrained=experiments/ldm_sf-mixed.dep4.lr3e-05.v_prediction.nossi.scaled_linear.randn.nossi.my_ddpm1000.SceneFlow_Dreds_HssdIsaacStd.180x320.cond7-raw+left+right.w0.0/epoch_0199",
260 |         
261 |         # uncomment if you choose variant rgb+raw
262 |         "task=eval_ldm_mixed_rgb+raw",
263 |         "task.resume_pretrained=experiments/ldm_sf-241212.2.dep4.lr3e-05.v_prediction.nossi.scaled_linear.randn.ddpm1000.Dreds_HssdIsaacStd_ClearPose.180x320.rgb+raw.w0.0/epoch_0056",
264 | 
265 |         # rest of the configurations
266 |         "task.eval_num_batch=1",
267 |         "task.image_size=[360,640]", 
268 |         "task.eval_batch_size=1",
269 |         "task.num_inference_rounds=1",
270 |         "task.num_inference_timesteps=10", "task.num_intermediate_images=5",
271 |         "task.write_pcd=true"
272 |     ]
273 |     """ if False: # turn on guidance
274 |         overrides += [
275 |             "task.sampler=my_ddim", 
276 |             "task.guide_source=raw-depth", 
277 |             "task.flow_guidance_mode=gradient", 
278 |             "task.flow_guidance_weights=[1.0]"
279 |         ] """
280 | 
281 |     droma = D3RoMa(overrides, camera, variant="rgb+raw")
282 | 
283 |     from PIL import Image
284 |     from hydra.utils import to_absolute_path
285 |     left = np.array(Image.open(to_absolute_path("./assets/examples/0000_ir_l.png")))
286 |     right = np.array(Image.open(to_absolute_path("./assets/examples/0000_ir_r.png")))
287 |     raw = np.array(Image.open(to_absolute_path("./assets/examples/0000_depth.png"))) * 1e-3
288 |     rgb = np.array(Image.open(to_absolute_path("./assets/examples/0000_rgb.png")))
289 | 
290 |     if droma.variant == "rgb+raw":
291 |         depth_aligned = camera.transform_depth_to_rgb_frame(raw) #if not alreay aligned
292 |         if True: # visualize aligned depth for realsense d415
293 |             valid = (depth_aligned > 0.2) & (depth_aligned < 5)
294 |             import matplotlib.pyplot as plt
295 |             cmap_spectral = plt.get_cmap('Spectral')
296 |             raw_depth_normalized = np.zeros_like(depth_aligned)
297 |             raw_depth_normalized[valid] = (depth_aligned[valid] - depth_aligned[valid].min()) / (depth_aligned[valid].max() - depth_aligned[valid].min())
298 |             Image.fromarray((cmap_spectral(raw_depth_normalized)*255.)[...,:3].astype(np.uint8)).save(f"raw_aligned.png")
299 | 
300 |         pred_depth = droma.infer_with_rgb_raw(rgb, depth_aligned)
301 |         # if droma.config.write_pcd:
302 |     elif droma.variant == "left+right+raw":
303 |         pred_depth = droma.infer(left, right, raw, rgb)
304 |     else:
305 |         raise NotImplementedError
306 | 
307 |     import matplotlib.pyplot as plt
308 |     cmap_spectral = plt.get_cmap('Spectral')
309 |     pred_depth_normalized = (pred_depth - pred_depth.min()) / (pred_depth.max() - pred_depth.min())
310 |     Image.fromarray((cmap_spectral(pred_depth_normalized)*255.)[...,:3].astype(np.uint8)).save(f"{droma.eval_output_dir}/pred.png")
311 | 
312 |     if droma.config.write_pcd:
313 |         from utils.utils import viz_cropped_pointcloud
314 |         gt_depth_np = raw # [H,W]
315 |         gt_masks_np = raw > 0
316 |         gt_depth_np[~gt_masks_np] = 0.0
317 |         gt_depth_np = camera.transform_depth_to_rgb_frame(gt_depth_np) #if not alreay aligned
318 |         viz_cropped_pointcloud(camera.K.arr, rgb, gt_depth_np, fname=f"{droma.eval_output_dir}/raw.ply")
319 | 
320 |         if droma.variant == "left+right+raw":
321 |             pred_depth = camera.transform_depth_to_rgb_frame(pred_depth)
322 |         viz_cropped_pointcloud(camera.K.arr, rgb, pred_depth, fname=f"{droma.eval_output_dir}/pred.ply")
323 | 


--------------------------------------------------------------------------------
/isaacsim/replicate/std_object.py:
--------------------------------------------------------------------------------
  1 | import os, random, time, json, math, copy
  2 | import numpy as np
  3 | 
  4 | import omni
  5 | import omni.replicator.core as rep
  6 | from omni.isaac.core.utils import prims
  7 | from omni.isaac.core.prims.rigid_prim import RigidPrim
  8 | from omni.isaac.core.utils.rotations import euler_angles_to_quat
  9 | from omni.isaac.core.utils.stage import get_current_stage, open_stage, create_new_stage
 10 | from pxr import Gf, Sdf, Usd, PhysxSchema, UsdGeom, UsdLux, UsdPhysics, UsdShade
 11 | 
 12 | from replicate.scene_replicator import Replicator
 13 | from dreds_renderer import DredsRenderer, generate_material_type, g_synset_name_scale_pairs
 14 | from utils_func import get_all_child_mesh, get_visibility_attribute
 15 | 
 16 | scene_prim_path = "/World/scene" #!!
 17 | 
 18 | class STDObjectReplicator(Replicator):
 19 | 
 20 |     def __init__(self, world, config) -> None:
 21 |         super().__init__(world, config)
 22 | 
 23 |         self.dr = {}
 24 | 
 25 |     def setup_domain_randomization(self):
 26 |         self.domain_randomization = self._config["domain_randomization"]
 27 |         assert self.domain_randomization, "not implemented yet!"
 28 | 
 29 |         # domain randomization of lighting
 30 |         light_type_dr = self._config["lighting"]["light_type"]
 31 |         self.light_type = light_type_dr[random.randint(0, len(light_type_dr))-1]
 32 |         light_conf_dr = self._config["lighting"][f"{self.light_type}_light"]
 33 | 
 34 |         self.dr['lighting'] = {}
 35 |         self.dr['lighting']['type'] = self.light_type
 36 |         
 37 |         light_conf = {
 38 |             'radius': random.uniform(*light_conf_dr['radius']),
 39 |             'height': random.uniform(*light_conf_dr['height']),
 40 |             'intensity': [
 41 |                 random.uniform(*light_conf_dr['intensity']['on']),
 42 |                 random.uniform(*light_conf_dr['intensity']['off'])
 43 |             ]
 44 |         }
 45 |         # self.dr['lighting'][f'{self.light_type}_light'] = light_conf
 46 |         self.dr['lighting'].update(light_conf)
 47 | 
 48 |         # scene disk light
 49 |         self._light = rep.create.light(
 50 |             light_type = self.light_type, #"Sphere", #"Disk",
 51 |             intensity = self.dr['lighting']["intensity"][0],
 52 |             color = (1.0, 1.0, 1.0),
 53 |             position = (0.0, 0.0, 0.0),
 54 |             name= f"{self.light_type}Light"
 55 |         )
 56 | 
 57 |         # prim_path_disk = "/Replicator/DiskLight_Xform/DiskLight"
 58 |         # rect_light = self._world.stage.GetPrimAtPath(prim_path_disk)
 59 |         # rect_light.GetAttribute("inputs:radius").Set(self._config["lighting"]["disk_light"]["radius"])
 60 | 
 61 |         prim_path_light = f"/Replicator/{self.light_type}Light_Xform/{self.light_type}Light"
 62 |         prim_light = self._world.stage.GetPrimAtPath(prim_path_light)
 63 |         prim_light.GetAttribute("inputs:radius").Set(self.dr["lighting"]["radius"])
 64 | 
 65 |         if self.dr["lighting"]["type"] == "Sphere":
 66 |             prim_light.GetAttribute("treatAsPoint").Set(True)
 67 | 
 68 |         # domain randomization of materials
 69 |         self.dr["std"] = {}
 70 |         transparent_dr = self._config["transparent"]
 71 |         transparent_conf = {
 72 |             "roughness_constant": random.uniform(*transparent_dr["roughness_constant"]),
 73 |             "cutout_opacity": random.uniform(*transparent_dr["cutout_opacity"]),
 74 |             "thin_walled": transparent_dr["thin_walled"],
 75 |             "glass_ior": random.uniform(*transparent_dr["glass_ior"]),
 76 |             "frosting_roughness": random.uniform(*transparent_dr["frosting_roughness"])
 77 |         }
 78 |         self.dr["std"]["transparent"] = transparent_conf
 79 | 
 80 |         specular_dr = self._config["specular"]
 81 |         specular_conf = {
 82 |             "reflection_roughness_constant": random.uniform(*specular_dr["reflection_roughness_constant"]),
 83 |             "metallic_constant": random.uniform(*specular_dr["metallic_constant"]),
 84 |             "reflection_color": random.uniform(*specular_dr["reflection_color"]),
 85 |         }
 86 |         self.dr["std"]["specular"] = specular_conf
 87 |         return self.dr
 88 |     
 89 |     def render(self) -> None:
 90 |         self._log("start std_obj render on surface")
 91 | 
 92 |         surface_config = self._config["hssd"]['surface']
 93 |         origin_prim_path = surface_config['prim_path']
 94 |         prim_path = origin_prim_path.replace("/World", scene_prim_path)
 95 |         surface_prim = self._world.stage.GetPrimAtPath(prim_path)
 96 |         self.enable_physics(surface_prim)
 97 | 
 98 |         surface_center_pos = self.calc_surface_center(surface_prim)
 99 |         # move disk light 1m above the surface center
100 |         # self._light.GetAttribute("xformOp:translate").Set((surface_center_pos[0], surface_center_pos[1], surface_center_pos[2] + 1.0))
101 |         with self._light:
102 |             rep.modify.pose(position=(surface_center_pos[0], 
103 |                                         surface_center_pos[1], 
104 |                                         surface_center_pos[2] + self.dr["lighting"]["height"]))
105 |             
106 |         # domain randomization
107 |         root_dir = os.path.abspath(self._config.dreds.cad_model_dir)
108 |         renderer = DredsRenderer(root_dir)
109 |         select_model_list, cam_q_list, cam_p_list = renderer.domain_randomize(self._config["num_frames_per_surface"])
110 |         surface_center_pos = self.calc_surface_center(surface_prim)
111 |         
112 |         # load object
113 |         all_rigid_objects = []
114 |         # last_object_name = None
115 |         # model_prims = {}
116 |         # material_prims = []
117 |         initial_materials = {}
118 |         for model in select_model_list:
119 |             prim_name = f"model_{model['instance_id']}_{model['class_name']}"
120 |             self._log(f"{model['material_type']}, {model['class_name']}, {model['instance_path']}")
121 | 
122 |             model_prim = prims.create_prim(
123 |                 prim_path=f"/World/{model['class_name']}_{model['instance_id']}",
124 |                 usd_path=f"file://{model['instance_path']}",
125 |                 semantic_label=prim_name,
126 |                 scale=[model['scale']]*3
127 |             )
128 |             # Wrap the prim into a rigid prim to be able to simulate it
129 |             box_rigid_prim = RigidPrim(
130 |                 prim_path=str(model_prim.GetPrimPath()),
131 |                 name=model['instance_name'],
132 |                 position=surface_center_pos + Gf.Vec3d(random.uniform(-0.3, 0.3), random.uniform(-0.3, 0.3),  model['instance_id'] * 0.05),
133 |                 orientation=euler_angles_to_quat([random.uniform(0, math.pi/2), random.uniform(0, math.pi/2), random.uniform(0, math.pi)]),
134 |             )
135 |             # set object as rigid body
136 |             box_rigid_prim.enable_rigid_body_physics()
137 |             # Enable collision
138 |             UsdPhysics.CollisionAPI.Apply(model_prim)
139 |             # Register rigid prim with the scene
140 |             self._world.scene.add(box_rigid_prim)
141 |             # last_object_name = model['instance_name']
142 |             all_rigid_objects.append(model['instance_name'])
143 |             # model_prims[model['instance_id']] = model_prim
144 | 
145 |             # disable opacity for ground truth depth rendering, tested in PathRendering mode.
146 |             for prim in get_all_child_mesh(model_prim):
147 |                 cur_mat, _ = UsdShade.MaterialBindingAPI(prim).ComputeBoundMaterial()
148 |                 shader = UsdShade.Shader(omni.usd.get_shader_from_material(cur_mat, get_prim=True))
149 |                 
150 |                 shader.CreateInput("enable_opacity", Sdf.ValueTypeNames.Bool)
151 |                 shader.GetInput("enable_opacity").Set(False)
152 | 
153 |             # change material
154 |             
155 |             if model["material_type"] == "transparent" or model['class_name'] in ["cup", "bottle"]: # hack transparent cup and bottle
156 |                 mat_type = model["material_type"]
157 |                 MDL = "OmniGlass.mdl"
158 |                 mtl_name, _ = os.path.splitext(MDL)
159 |                 MAT_PATH = "/World/Looks"
160 |                 
161 |                 prim_path = omni.usd.get_stage_next_free_path(self._world.stage, f"{MAT_PATH}/{mtl_name}", False)
162 |                 mat = self.create_omnipbr_material(mtl_url=MDL, mtl_name=mtl_name, mtl_path=prim_path)
163 |                 
164 |                 initial_materials[model_prim] = mat
165 |                 # material_prims.append(prim_path)
166 | 
167 |             elif model["material_type"] == "specular":
168 |                 mat_type = model["material_type"]
169 |                 for prim in get_all_child_mesh(model_prim):
170 |                     
171 |                     if len(prim.GetChildren()) >1 :
172 |                         # hot fix: multi-materials
173 |                         self._log(f"multi-materials: {prim.GetPath()}")
174 |                         for subp in prim.GetChildren():
175 |                             mat, _ = UsdShade.MaterialBindingAPI(subp).ComputeBoundMaterial()
176 |                             shader = UsdShade.Shader(omni.usd.get_shader_from_material(mat, get_prim=False))
177 | 
178 |                             shader.CreateInput("metallic", Sdf.ValueTypeNames.Float)
179 |                             shader.CreateInput("roughness", Sdf.ValueTypeNames.Float)
180 | 
181 |                             shader.GetInput("metallic").Set(self.dr["std"]["specular"]["metallic_constant"])
182 |                             shader.GetInput("roughness").Set(self.dr["std"]["specular"]["reflection_roughness_constant"])
183 |                         continue
184 | 
185 |                     cur_mat, _ = UsdShade.MaterialBindingAPI(prim).ComputeBoundMaterial()
186 |                     shader = UsdShade.Shader(omni.usd.get_shader_from_material(cur_mat, get_prim=True))
187 | 
188 |                     # Add value inputs
189 |                     shader.CreateInput("diffuse_color_constant", Sdf.ValueTypeNames.Color3f)
190 |                     shader.CreateInput("reflection_roughness_constant", Sdf.ValueTypeNames.Float)
191 |                     shader.CreateInput("metallic_constant", Sdf.ValueTypeNames.Float)
192 | 
193 |                     # Add texture inputs
194 |                     shader.CreateInput("diffuse_texture", Sdf.ValueTypeNames.Asset)
195 |                     shader.CreateInput("reflectionroughness_texture", Sdf.ValueTypeNames.Asset)
196 |                     shader.CreateInput("metallic_texture", Sdf.ValueTypeNames.Asset)
197 | 
198 |                     # Add other attributes
199 |                     shader.CreateInput("project_uvw", Sdf.ValueTypeNames.Bool)
200 | 
201 |                     # Add texture scale and rotate
202 |                     shader.CreateInput("texture_scale", Sdf.ValueTypeNames.Float2)
203 |                     shader.CreateInput("texture_rotate", Sdf.ValueTypeNames.Float)
204 |                     
205 |                     shader.GetInput("metallic_constant").Set(self.dr["std"]["specular"]["metallic_constant"])
206 |                     shader.GetInput("reflection_roughness_constant").Set(self.dr["std"]["specular"]["reflection_roughness_constant"])
207 | 
208 |                     UsdShade.MaterialBindingAPI(prim).Bind(cur_mat, UsdShade.Tokens.strongerThanDescendants)
209 | 
210 |             elif model["material_type"] == "diffuse":
211 |                 mat_type = model["material_type"]
212 |                 pass
213 | 
214 |         # randomize camera
215 |         surface_center = self.calc_surface_center(surface_prim)
216 |         self.rep_randomize_camera(None, surface_center, cam_p_list, cam_q_list)
217 | 
218 |         # output_dir = f"{self.output_dir}/{self._config["hssd"]['name']}/{surface_config['category']}"
219 |         # os.makedirs(output_dir, exist_ok=True)
220 |         # self.writer._output_dir = output_dir
221 |         # output_dir = self._config.writer_config.output_dir
222 |         with open(f"{self.output_dir}/meta_{self.next_seq_id}.json", 'w') as f:
223 |             meta = {
224 |                 "models": select_model_list,
225 |                 "domain_randomization":  self.dr
226 |             }
227 |             f.write(json.dumps(meta, indent=4, sort_keys=True))
228 | 
229 |         # replicate texture
230 |         # self.randomize_texture(model_prims)
231 |         # Setup the writer
232 | 
233 |         cfg = copy.deepcopy(self._config["writer_config"])
234 |         cfg["output_dir"] = self.output_dir
235 |         cfg["start_sequence_id"] = self.next_seq_id
236 | 
237 |         _config = copy.copy(self._config)
238 |         _config["writer_config"]["output_dir"] = self.output_dir
239 |         _config["writer_config"]["start_sequence_id"] = self.next_seq_id
240 |         
241 |         # self._config["writer_config"]["output_dir"]
242 | 
243 |         resolution = np.array(self._config["depth_sensor"]["resolution"]).astype(np.uint32).tolist()
244 |         dep_res = (resolution[0], resolution[1])
245 |         self.writer_gt = rep.WriterRegistry.get("GtWriter")
246 |         self.writer_gt.initialize(ticker=self.ticker, depth_sensor_cfg=_config["depth_sensor"], **_config["writer_config"])
247 |         cam_gt_rp = rep.create.render_product(self.cam_rgb, dep_res, name="CameraDepth")
248 |         self.writer_gt.attach([cam_gt_rp])
249 | 
250 |         # start simulation
251 |         self._world.reset()
252 | 
253 |         if self._config["render_after_quiet"]:
254 |             # wait for objects to fall
255 |             # last_box = self._world.scene.get_object(last_object_name)
256 |             max_tried = 0
257 |             while True and max_tried < 10:
258 |                 max_sim_steps = 250
259 |                 for i in range(max_sim_steps):
260 |                     self._world.step(render=False)
261 |                 quited = True
262 |                 for rigid_object in all_rigid_objects:
263 |                     obj = self._world.scene.get_object(rigid_object)
264 |                     if obj is None:
265 |                         self._log(f"{rigid_object} is not found!")
266 |                         continue
267 |                     if np.linalg.norm(obj.get_linear_velocity()) > 0.001:
268 |                         quited = False
269 |                         break
270 |                 if quited:
271 |                     self._log("all objects quited")
272 |                     break # stop physics simulation, start rendering
273 |                 max_tried += 1
274 |                 self._log("still waiting for objects to fall")
275 |         
276 |         rep.settings.set_render_rtx_realtime()
277 |         start_time = time.time()
278 |         # rep.orchestrator.run_until_complete(num_frames=2*self._config['num_frames_per_surface'])
279 |         for _ in range(2*self._config['num_frames_per_surface']):
280 |             self._writer_tick = "gt"
281 |             if _ % 2 == 0:
282 |                 self._step_tick += 1
283 |             rep.orchestrator.step(rt_subframes=self._config['rt_subframes'], pause_timeline=True)
284 |         
285 |         end_time = time.time()
286 | 
287 |         # log running time
288 |         runtime = end_time - start_time
289 |         fps = runtime / self._config['num_frames_per_surface']
290 |         self._log(f"Replicator finished in {round(runtime, 2)} seconds, FPS={round(fps, 2)}")
291 | 
292 |         # change materials
293 |         for model_prim_, mat_ in initial_materials.items():
294 |             UsdShade.MaterialBindingAPI(model_prim_).Bind(mat_, UsdShade.Tokens.strongerThanDescendants)
295 | 
296 |         self.writer_gt.detach()
297 | 
298 |         self.writer_rgb = rep.WriterRegistry.get("ColorWriter")
299 |         self.writer_rgb.initialize(ticker=self.ticker, **cfg)
300 |         cam_rgb_rp = rep.create.render_product(self.cam_rgb, resolution, name="CameraRGB")
301 |         self.writer_rgb.attach([cam_rgb_rp])
302 | 
303 |         self.writer_ir = rep.WriterRegistry.get("IRWriter")
304 |         self.writer_ir.initialize(output_dir = self.output_dir, start_sequence_id = self.next_seq_id, ticker=self.ticker)
305 |         cam_left_ir_rp = rep.create.render_product(self.cam_ir_left, resolution, name="Camera01")
306 |         cam_right_ir_rp = rep.create.render_product(self.cam_ir_right, resolution, name="Camera02")
307 |         self.writer_ir.attach([cam_left_ir_rp, cam_right_ir_rp])
308 | 
309 |         if self._config["launch_config"]["renderer"] == "PathTracing": # hack
310 |             rep.settings.set_render_pathtraced()
311 |         start_time = time.time()
312 |         # rep.orchestrator.run_until_complete(num_frames=2*self._config['num_frames_per_surface'])
313 |         for _ in range(2*self._config['num_frames_per_surface']):
314 |             if _ % 2 == 0:
315 |                 self._writer_tick = "rgb"
316 |             else:
317 |                 self._writer_tick = "ir"
318 |                 self._step_tick += 1
319 |             rep.orchestrator.step(rt_subframes=self._config['rt_subframes'], pause_timeline=True)
320 |         end_time = time.time()
321 |         runtime = end_time - start_time
322 |         fps = runtime / self._config['num_frames_per_surface']
323 |         self._log(f"Replicator finished in {round(runtime, 2)} seconds, FPS={round(fps, 2)}")


--------------------------------------------------------------------------------
/data/augmentor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import warnings
  4 | import os
  5 | import time
  6 | from glob import glob
  7 | from skimage import color, io
  8 | from PIL import Image
  9 | 
 10 | import cv2
 11 | cv2.setNumThreads(0)
 12 | cv2.ocl.setUseOpenCL(False)
 13 | 
 14 | import torch
 15 | from torchvision.transforms import ColorJitter, functional, Compose
 16 | import torch.nn.functional as F
 17 | 
 18 | def get_middlebury_images():
 19 |     root = "datasets/Middlebury/MiddEval3"
 20 |     with open(os.path.join(root, "official_train.txt"), 'r') as f:
 21 |         lines = f.read().splitlines()
 22 |     return sorted([os.path.join(root, 'trainingQ', f'{name}/im0.png') for name in lines])
 23 | 
 24 | def get_eth3d_images():
 25 |     return sorted(glob('datasets/ETH3D/two_view_training/*/im0.png'))
 26 | 
 27 | def get_kitti_images():
 28 |     return sorted(glob('datasets/KITTI/training/image_2/*_10.png'))
 29 | 
 30 | def transfer_color(image, style_mean, style_stddev):
 31 |     reference_image_lab = color.rgb2lab(image)
 32 |     reference_stddev = np.std(reference_image_lab, axis=(0,1), keepdims=True)# + 1
 33 |     reference_mean = np.mean(reference_image_lab, axis=(0,1), keepdims=True)
 34 | 
 35 |     reference_image_lab = reference_image_lab - reference_mean
 36 |     lamb = style_stddev/reference_stddev
 37 |     style_image_lab = lamb * reference_image_lab
 38 |     output_image_lab = style_image_lab + style_mean
 39 |     l, a, b = np.split(output_image_lab, 3, axis=2)
 40 |     l = l.clip(0, 100)
 41 |     output_image_lab = np.concatenate((l,a,b), axis=2)
 42 |     with warnings.catch_warnings():
 43 |         warnings.simplefilter("ignore", category=UserWarning)
 44 |         output_image_rgb = color.lab2rgb(output_image_lab) * 255
 45 |         return output_image_rgb
 46 | 
 47 | class AdjustGamma(object):
 48 | 
 49 |     def __init__(self, gamma_min, gamma_max, gain_min=1.0, gain_max=1.0):
 50 |         self.gamma_min, self.gamma_max, self.gain_min, self.gain_max = gamma_min, gamma_max, gain_min, gain_max
 51 | 
 52 |     def __call__(self, sample):
 53 |         gain = random.uniform(self.gain_min, self.gain_max)
 54 |         gamma = random.uniform(self.gamma_min, self.gamma_max)
 55 |         return functional.adjust_gamma(sample, gamma, gain)
 56 | 
 57 |     def __repr__(self):
 58 |         return f"Adjust Gamma {self.gamma_min}, ({self.gamma_max}) and Gain ({self.gain_min}, {self.gain_max})"
 59 | 
 60 | class FlowAugmentor:
 61 |     def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=True, yjitter=False, saturation_range=[0.6,1.4], gamma=[1,1,1,1], stretch=False):
 62 | 
 63 |         # spatial augmentation params
 64 |         self.crop_size = crop_size
 65 |         self.min_scale = min_scale
 66 |         self.max_scale = max_scale
 67 |         if stretch:
 68 |             self.spatial_aug_prob = 1.0
 69 |             self.stretch_prob = 0.8
 70 |             self.max_stretch = 0.2
 71 |         else:
 72 |             self.spatial_aug_prob = 0.0
 73 |             self.stretch_prob = 0.0
 74 |             self.max_stretch = 0.0
 75 | 
 76 |         # flip augmentation params
 77 |         self.yjitter = yjitter
 78 |         self.do_flip = do_flip
 79 |         self.h_flip_prob = 0.5
 80 |         self.v_flip_prob = 0.1
 81 | 
 82 |         # photometric augmentation params
 83 |         self.photo_aug = Compose([ColorJitter(brightness=0.4, contrast=0.4, saturation=saturation_range, hue=0.5/3.14), AdjustGamma(*gamma)])
 84 |         self.asymmetric_color_aug_prob = 0.2
 85 |         self.eraser_aug_prob = 0.5
 86 | 
 87 |     def color_transform(self, img1, img2):
 88 |         """ Photometric augmentation """
 89 | 
 90 |         # asymmetric
 91 |         if np.random.rand() < self.asymmetric_color_aug_prob:
 92 |             img1 = np.array(self.photo_aug(Image.fromarray(img1)), dtype=np.uint8)
 93 |             img2 = np.array(self.photo_aug(Image.fromarray(img2)), dtype=np.uint8)
 94 | 
 95 |         # symmetric
 96 |         else:
 97 |             image_stack = np.concatenate([img1, img2], axis=0)
 98 |             image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
 99 |             img1, img2 = np.split(image_stack, 2, axis=0)
100 | 
101 |         return img1, img2
102 | 
103 |     def eraser_transform(self, img1, img2, bounds=[50, 100]):
104 |         """ Occlusion augmentation """
105 | 
106 |         ht, wd = img1.shape[:2]
107 |         if np.random.rand() < self.eraser_aug_prob:
108 |             mean_color = np.mean(img2.reshape(-1, 3), axis=0)
109 |             for _ in range(np.random.randint(1, 3)):
110 |                 x0 = np.random.randint(0, wd)
111 |                 y0 = np.random.randint(0, ht)
112 |                 dx = np.random.randint(bounds[0], bounds[1])
113 |                 dy = np.random.randint(bounds[0], bounds[1])
114 |                 img2[y0:y0+dy, x0:x0+dx, :] = mean_color
115 | 
116 |         return img1, img2
117 | 
118 |     def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0):
119 |         ht, wd = flow.shape[:2]
120 |         coords = np.meshgrid(np.arange(wd), np.arange(ht))
121 |         coords = np.stack(coords, axis=-1)
122 | 
123 |         coords = coords.reshape(-1, 2).astype(np.float32)
124 |         flow = flow.reshape(-1, 2).astype(np.float32)
125 |         valid = valid.reshape(-1).astype(np.float32)
126 | 
127 |         coords0 = coords[valid>=1]
128 |         flow0 = flow[valid>=1]
129 | 
130 |         ht1 = int(round(ht * fy))
131 |         wd1 = int(round(wd * fx))
132 | 
133 |         coords1 = coords0 * [fx, fy]
134 |         flow1 = flow0 * [fx, fy]
135 | 
136 |         xx = np.round(coords1[:,0]).astype(np.int32)
137 |         yy = np.round(coords1[:,1]).astype(np.int32)
138 | 
139 |         v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1)
140 |         xx = xx[v]
141 |         yy = yy[v]
142 |         flow1 = flow1[v]
143 | 
144 |         flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
145 |         valid_img = np.zeros([ht1, wd1], dtype=np.int32)
146 | 
147 |         flow_img[yy, xx] = flow1
148 |         valid_img[yy, xx] = 1
149 | 
150 |         return flow_img, valid_img
151 | 
152 |     def spatial_transform(self, img1, img2, flow, sim_flow, sim_valid):
153 |         # randomly sample scale
154 |         ht, wd = img1.shape[:2]
155 |         min_scale = np.maximum(
156 |             (self.crop_size[0] + 8) / float(ht), 
157 |             (self.crop_size[1] + 8) / float(wd))
158 | 
159 |         scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
160 |         scale_x = scale
161 |         scale_y = scale
162 |         if np.random.rand() < self.stretch_prob:
163 |             scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
164 |             scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
165 |         
166 |         scale_x = np.clip(scale_x, min_scale, None)
167 |         scale_y = np.clip(scale_y, min_scale, None)
168 | 
169 |         if np.random.rand() < self.spatial_aug_prob:
170 |             # rescale the images
171 |             img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
172 |             img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
173 |             flow = cv2.resize(flow, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
174 |             flow = flow * [scale_x, scale_y]
175 | 
176 |             sim_flow, sim_valid = self.resize_sparse_flow_map(sim_flow, sim_valid, fx=scale_x, fy=scale_y)
177 | 
178 |         if self.do_flip:
179 |             if np.random.rand() < self.h_flip_prob and self.do_flip == 'hf': # h-flip
180 |                 img1 = img1[:, ::-1]
181 |                 img2 = img2[:, ::-1]
182 |                 flow = flow[:, ::-1] * [-1.0, 1.0]
183 |                 sim_flow = sim_flow[:, ::-1] * [-1.0, 1.0]
184 | 
185 |             if np.random.rand() < self.h_flip_prob and self.do_flip == 'h': # h-flip for stereo
186 |                 tmp = img1[:, ::-1]
187 |                 img1 = img2[:, ::-1]
188 |                 img2 = tmp
189 | 
190 |             if np.random.rand() < self.v_flip_prob and self.do_flip == 'v': # v-flip
191 |                 img1 = img1[::-1, :]
192 |                 img2 = img2[::-1, :]
193 |                 flow = flow[::-1, :] * [1.0, -1.0]
194 |                 sim_flow = sim_flow[::-1, :] * [1.0, -1.0]
195 | 
196 |         if self.yjitter:
197 |             y0 = np.random.randint(2, img1.shape[0] - self.crop_size[0] - 2)
198 |             x0 = np.random.randint(2, img1.shape[1] - self.crop_size[1] - 2)
199 | 
200 |             y1 = y0 + np.random.randint(-2, 2 + 1)
201 |             img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
202 |             img2 = img2[y1:y1+self.crop_size[0], x0:x0+self.crop_size[1]]
203 |             flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
204 |             sim_flow = sim_flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
205 |             sim_valid = sim_valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
206 | 
207 |         else:
208 |             y0 = 0 if img1.shape[0] == self.crop_size[0] else np.random.randint(0, img1.shape[0] - self.crop_size[0])
209 |             x0 = 0 if img1.shape[1] == self.crop_size[1] else np.random.randint(0, img1.shape[1] - self.crop_size[1])
210 |             
211 |             img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
212 |             img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
213 |             flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
214 |             sim_flow = sim_flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
215 |             sim_valid = sim_valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
216 | 
217 |         return img1, img2, flow, sim_flow, sim_valid
218 | 
219 | 
220 |     def __call__(self, img1, img2, flow, sim_flow, sim_valid):
221 |         img1, img2 = self.color_transform(img1, img2)
222 |         img1, img2 = self.eraser_transform(img1, img2)
223 |         img1, img2, flow, sim_flow, sim_valid = self.spatial_transform(img1, img2, flow, sim_flow, sim_valid)
224 | 
225 |         img1 = np.ascontiguousarray(img1)
226 |         img2 = np.ascontiguousarray(img2)
227 |         flow = np.ascontiguousarray(flow)
228 |         sim_flow = np.ascontiguousarray(sim_flow)
229 |         sim_valid = np.ascontiguousarray(sim_valid)
230 | 
231 |         return img1, img2, flow, sim_flow, sim_valid
232 | 
233 | class SparseFlowAugmentor:
234 |     def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=False, yjitter=False, saturation_range=[0.7,1.3], gamma=[1,1,1,1]):
235 |         # spatial augmentation params
236 |         self.crop_size = crop_size
237 |         self.min_scale = min_scale
238 |         self.max_scale = max_scale
239 |         self.spatial_aug_prob = 0.8
240 |         self.stretch_prob = 0.8
241 |         self.max_stretch = 0.2
242 | 
243 |         # flip augmentation params
244 |         self.do_flip = do_flip
245 |         self.h_flip_prob = 0.5
246 |         self.v_flip_prob = 0.1
247 | 
248 |         # photometric augmentation params
249 |         self.photo_aug = Compose([ColorJitter(brightness=0.3, contrast=0.3, saturation=saturation_range, hue=0.3/3.14), AdjustGamma(*gamma)])
250 |         self.asymmetric_color_aug_prob = 0.2
251 |         self.eraser_aug_prob = 0.5
252 |         
253 |     def color_transform(self, img1, img2):
254 |         image_stack = np.concatenate([img1, img2], axis=0)
255 |         image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
256 |         img1, img2 = np.split(image_stack, 2, axis=0)
257 |         return img1, img2
258 | 
259 |     def eraser_transform(self, img1, img2):
260 |         ht, wd = img1.shape[:2]
261 |         if np.random.rand() < self.eraser_aug_prob:
262 |             mean_color = np.mean(img2.reshape(-1, 3), axis=0)
263 |             for _ in range(np.random.randint(1, 3)):
264 |                 x0 = np.random.randint(0, wd)
265 |                 y0 = np.random.randint(0, ht)
266 |                 dx = np.random.randint(50, 100)
267 |                 dy = np.random.randint(50, 100)
268 |                 img2[y0:y0+dy, x0:x0+dx, :] = mean_color
269 | 
270 |         return img1, img2
271 | 
272 |     def resize_sparse_flow_map(self, flow, valid, sim_flow, sim_valid, fx=1.0, fy=1.0):
273 |         ht, wd = flow.shape[:2]
274 |         coords = np.meshgrid(np.arange(wd), np.arange(ht))
275 |         coords = np.stack(coords, axis=-1)
276 | 
277 |         coords = coords.reshape(-1, 2).astype(np.float32)
278 |         flow = flow.reshape(-1, 2).astype(np.float32)
279 |         valid = valid.reshape(-1).astype(np.float32)
280 | 
281 |         sim_flow = sim_flow.reshape(-1, 2).astype(np.float32)
282 |         sim_valid = sim_valid.reshape(-1).astype(np.float32)
283 | 
284 |         coords0 = coords[valid>=1]
285 |         flow0 = flow[valid>=1]
286 | 
287 |         coords0_sim = coords[sim_valid>=1]
288 |         flow0_sim = sim_flow[sim_valid>=1]
289 | 
290 |         ht1 = int(round(ht * fy))
291 |         wd1 = int(round(wd * fx))
292 | 
293 |         coords1 = coords0 * [fx, fy]
294 |         flow1 = flow0 * [fx, fy]
295 | 
296 |         coords1_sim = coords0_sim * [fx, fy]
297 |         flow1_sim = flow0_sim * [fx, fy]
298 | 
299 |         xx = np.round(coords1[:,0]).astype(np.int32)
300 |         yy = np.round(coords1[:,1]).astype(np.int32)
301 | 
302 |         xx_sim = np.round(coords1_sim[:,0]).astype(np.int32)
303 |         yy_sim = np.round(coords1_sim[:,1]).astype(np.int32)
304 | 
305 |         v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1)
306 |         xx = xx[v]
307 |         yy = yy[v]
308 |         flow1 = flow1[v]
309 | 
310 |         v_sim = (xx_sim > 0) & (xx_sim < wd1) & (yy_sim > 0) & (yy_sim < ht1)
311 |         xx_sim = xx_sim[v_sim]
312 |         yy_sim = yy_sim[v_sim]
313 |         flow1_sim = flow1_sim[v_sim]
314 | 
315 |         flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
316 |         valid_img = np.zeros([ht1, wd1], dtype=np.int32)
317 | 
318 |         sim_flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
319 |         sim_valid_img = np.zeros([ht1, wd1], dtype=np.int32)
320 | 
321 |         flow_img[yy, xx] = flow1
322 |         valid_img[yy, xx] = 1
323 | 
324 |         sim_flow_img[yy_sim, xx_sim] = flow1_sim
325 |         sim_valid_img[yy_sim, xx_sim] = 1
326 | 
327 |         return flow_img, valid_img, sim_flow_img, sim_valid_img
328 | 
329 |     def spatial_transform(self, img1, img2, flow, valid, sim_flow, sim_valid):
330 |         # randomly sample scale
331 | 
332 |         ht, wd = img1.shape[:2]
333 |         min_scale = np.maximum(
334 |             (self.crop_size[0]) / float(ht),  #+1
335 |             (self.crop_size[1]) / float(wd))  #+1
336 | 
337 |         scale = 2 ** np.random.uniform(self.min_scale, self.max_scale) # default [0.87 ~ 1.32]
338 |         scale_x = np.clip(scale, min_scale, None)
339 |         scale_y = np.clip(scale, min_scale, None)
340 | 
341 |         if True or np.random.rand() < self.spatial_aug_prob:
342 |             # rescale the images
343 |             img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
344 |             img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
345 |             flow, valid, sim_flow, sim_valid = self.resize_sparse_flow_map(flow, valid, sim_flow, sim_valid, fx=scale_x, fy=scale_y)
346 | 
347 |         if self.do_flip:
348 |             rand1 = np.random.rand()
349 |             if rand1 < self.h_flip_prob and self.do_flip == 'hf': # h-flip
350 |                 img1 = img1[:, ::-1]
351 |                 img2 = img2[:, ::-1]
352 |                 flow = flow[:, ::-1] * [-1.0, 1.0]
353 |                 sim_flow = sim_flow[:, ::-1] * [-1.0, 1.0]
354 | 
355 |             rand2 = np.random.rand()
356 |             if rand2 < self.h_flip_prob and self.do_flip == 'h': # h-flip for stereo
357 |                 tmp = img1[:, ::-1]
358 |                 img1 = img2[:, ::-1]
359 |                 img2 = tmp
360 | 
361 |             rand3 = np.random.rand()
362 |             if rand3 < self.v_flip_prob and self.do_flip == 'v': # v-flip
363 |                 img1 = img1[::-1, :]
364 |                 img2 = img2[::-1, :]
365 |                 flow = flow[::-1, :] * [1.0, -1.0]
366 |                 sim_flow = sim_flow[::-1, :] * [1.0, -1.0]
367 | 
368 |         margin_y = 20
369 |         margin_x = 50
370 | 
371 |         y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0] + margin_y)
372 |         x0 = np.random.randint(-margin_x, img1.shape[1] - self.crop_size[1] + margin_x)
373 | 
374 |         y0 = np.clip(y0, 0, img1.shape[0] - self.crop_size[0])
375 |         x0 = np.clip(x0, 0, img1.shape[1] - self.crop_size[1])
376 | 
377 |         img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
378 |         img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
379 |         flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
380 |         valid = valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
381 |         sim_flow = sim_flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
382 |         sim_valid = sim_valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
383 |         return img1, img2, flow, valid, sim_flow, sim_valid
384 | 
385 | 
386 |     def __call__(self, img1, img2, flow, valid, sim_flow, sim_valid):
387 |         img1, img2 = self.color_transform(img1, img2)
388 |         img1, img2 = self.eraser_transform(img1, img2)
389 |         img1, img2, flow, valid, sim_flow, sim_valid = self.spatial_transform(img1, img2, flow, valid, sim_flow, sim_valid)
390 | 
391 |         img1 = np.ascontiguousarray(img1)
392 |         img2 = np.ascontiguousarray(img2)
393 |         flow = np.ascontiguousarray(flow)
394 |         valid = np.ascontiguousarray(valid)
395 |         sim_flow = np.ascontiguousarray(sim_flow)
396 |         sim_valid = np.ascontiguousarray(sim_valid)
397 | 
398 |         return img1, img2, flow, valid, sim_flow, sim_valid
399 | 


--------------------------------------------------------------------------------
/utils/frame_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image
  3 | from os.path import *
  4 | import re
  5 | import json
  6 | import imageio
  7 | import os
  8 | import cv2
  9 | import torch
 10 | import torch.nn.functional as F
 11 | from scipy import interpolate
 12 | 
 13 | cv2.setNumThreads(0)
 14 | cv2.ocl.setUseOpenCL(False)
 15 | 
 16 | TAG_CHAR = np.array([202021.25], np.float32)
 17 | 
 18 | def readFlow(fn):
 19 |     """ Read .flo file in Middlebury format"""
 20 |     # Code adapted from:
 21 |     # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
 22 | 
 23 |     # WARNING: this will work on little-endian architectures (eg Intel x86) only!
 24 |     # print 'fn = %s'%(fn)
 25 |     with open(fn, 'rb') as f:
 26 |         magic = np.fromfile(f, np.float32, count=1)
 27 |         if 202021.25 != magic:
 28 |             print('Magic number incorrect. Invalid .flo file')
 29 |             return None
 30 |         else:
 31 |             w = np.fromfile(f, np.int32, count=1)
 32 |             h = np.fromfile(f, np.int32, count=1)
 33 |             # print 'Reading %d x %d flo file\n' % (w, h)
 34 |             data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
 35 |             # Reshape data into 3D array (columns, rows, bands)
 36 |             # The reshape here is for visualization, the original code is (w,h,2)
 37 |             return np.resize(data, (int(h), int(w), 2))
 38 | 
 39 | def readPFM(file):
 40 |     file = open(file, 'rb')
 41 | 
 42 |     color = None
 43 |     width = None
 44 |     height = None
 45 |     scale = None
 46 |     endian = None
 47 | 
 48 |     header = file.readline().rstrip()
 49 |     if header == b'PF':
 50 |         color = True
 51 |     elif header == b'Pf':
 52 |         color = False
 53 |     else:
 54 |         raise Exception('Not a PFM file.')
 55 | 
 56 |     dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline())
 57 |     if dim_match:
 58 |         width, height = map(int, dim_match.groups())
 59 |     else:
 60 |         raise Exception('Malformed PFM header.')
 61 | 
 62 |     scale = float(file.readline().rstrip())
 63 |     if scale < 0: # little-endian
 64 |         endian = '<'
 65 |         scale = -scale
 66 |     else:
 67 |         endian = '>' # big-endian
 68 | 
 69 |     data = np.fromfile(file, endian + 'f')
 70 |     shape = (height, width, 3) if color else (height, width)
 71 | 
 72 |     data = np.reshape(data, shape)
 73 |     data = np.flipud(data)
 74 |     return data
 75 | 
 76 | def writePFM(file, array):
 77 |     import os
 78 |     assert type(file) is str and type(array) is np.ndarray and \
 79 |            os.path.splitext(file)[1] == ".pfm"
 80 |     with open(file, 'wb') as f:
 81 |         H, W = array.shape
 82 |         headers = ["Pf\n", f"{W} {H}\n", "-1\n"]
 83 |         for header in headers:
 84 |             f.write(str.encode(header))
 85 |         array = np.flip(array, axis=0).astype(np.float32)
 86 |         f.write(array.tobytes())
 87 | 
 88 | 
 89 | 
 90 | def writeFlow(filename,uv,v=None):
 91 |     """ Write optical flow to file.
 92 |     
 93 |     If v is None, uv is assumed to contain both u and v channels,
 94 |     stacked in depth.
 95 |     Original code by Deqing Sun, adapted from Daniel Scharstein.
 96 |     """
 97 |     nBands = 2
 98 | 
 99 |     if v is None:
100 |         assert(uv.ndim == 3)
101 |         assert(uv.shape[2] == 2)
102 |         u = uv[:,:,0]
103 |         v = uv[:,:,1]
104 |     else:
105 |         u = uv
106 | 
107 |     assert(u.shape == v.shape)
108 |     height,width = u.shape
109 |     f = open(filename,'wb')
110 |     # write the header
111 |     f.write(TAG_CHAR)
112 |     np.array(width).astype(np.int32).tofile(f)
113 |     np.array(height).astype(np.int32).tofile(f)
114 |     # arrange into matrix form
115 |     tmp = np.zeros((height, width*nBands))
116 |     tmp[:,np.arange(width)*2] = u
117 |     tmp[:,np.arange(width)*2 + 1] = v
118 |     tmp.astype(np.float32).tofile(f)
119 |     f.close()
120 | 
121 | 
122 | def readFlowKITTI(filename):
123 |     flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)
124 |     flow = flow[:,:,::-1].astype(np.float32)
125 |     flow, valid = flow[:, :, :2], flow[:, :, 2]
126 |     flow = (flow - 2**15) / 64.0
127 |     return flow, valid
128 | 
129 | def readDispKITTI(filename):
130 |     disp = cv2.imread(filename, cv2.IMREAD_ANYDEPTH) / 256.0
131 |     valid = disp > 0.0
132 |     return disp, valid
133 | 
134 | # Method taken from /n/fs/raft-depth/RAFT-Stereo/datasets/SintelStereo/sdk/python/sintel_io.py
135 | def readDispSintelStereo(file_name):
136 |     a = np.array(Image.open(file_name))
137 |     d_r, d_g, d_b = np.split(a, axis=2, indices_or_sections=3)
138 |     disp = (d_r * 4 + d_g / (2**6) + d_b / (2**14))[..., 0]
139 |     mask = np.array(Image.open(file_name.replace('disparities', 'occlusions')))
140 |     valid = ((mask == 0) & (disp > 0))
141 |     return disp, valid
142 | 
143 | # Method taken from https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt
144 | def readDispFallingThings(file_name):
145 |     a = np.array(Image.open(file_name))
146 |     with open('/'.join(file_name.split('/')[:-1] + ['_camera_settings.json']), 'r') as f:
147 |         intrinsics = json.load(f)
148 |     fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx']
149 |     disp = (fx * 6.0 * 100) / a.astype(np.float32)
150 |     valid = disp > 0
151 |     return disp, valid
152 | 
153 | # Method taken from https://github.com/castacks/tartanair_tools/blob/master/data_type.md
154 | def readDispTartanAir(file_name):
155 |     depth = np.load(file_name)
156 |     disp = 80.0 / depth
157 |     valid = disp > 0
158 |     return disp, valid
159 | 
160 | def readDispSTD_np(filename):
161 |     disp = np.load(filename)
162 |     valid = (disp > 0) & ~ np.isinf(disp)
163 |     return disp, valid
164 | 
165 | def readDispReal(camera, filename):
166 |     """ 
167 |     read disparity either ground truth depth or simulated disparity
168 |     resize here aligns the file resolution with desired camera resolution 
169 |     """
170 |     if not os.path.exists(filename):
171 |         # hack: prevent dataset errors
172 |         return np.ones(camera.resolution), np.ones(camera.resolution, dtype=bool), 0, 1
173 | 
174 |     ext = splitext(filename)[-1]
175 |     if ext == ".png":
176 |         data = cv2.imread(filename, cv2.IMREAD_ANYDEPTH)
177 |     elif ext == ".npy":
178 |         data = np.load(filename)
179 |     elif ext == ".exr":
180 |         data = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
181 |         if data is None:
182 |             print(f"bug: {filename}")
183 |         if len(data.shape) == 3 and data.shape[-1] == 3:
184 |             data = data[...,0]
185 |     else:
186 |         raise NotImplementedError
187 |     
188 |     scale = data.shape[1] / camera.resolution[1]
189 |     data = cv2.resize(data, dsize=camera.resolution[::-1], interpolation=cv2.INTER_NEAREST)
190 |     valid = ~ np.isinf(data) & ~ np.isnan(data) & (data > 0)
191 | 
192 |     if "depth" in filename or "Depth" in filename:
193 |         # depth = camera.transform_depth_to_rgb_frame(depth) #if not alreay aligned
194 |         disp = np.zeros_like(data, dtype=np.float32)
195 |         # FIXME: hack 
196 |         depth_unit = 1
197 |         if camera.device == "fxm" or camera.device == "jav" or camera.device == "d435":
198 |             depth_unit = 1e-3
199 |             valid = valid & (data > 200) & (data < 3000)
200 |             data = np.clip(data, a_min=0.0, a_max=3000) # only clip large depth values
201 |         elif camera.device == "clearpose":
202 |             depth_unit = 1e-3
203 |             min_depth = camera.min_depth / depth_unit
204 |             max_depth = camera.max_depth / depth_unit
205 |             valid = valid & (data > min_depth ) & (data < max_depth) # [0.2~10]
206 |             data = np.clip(data, a_min = 0.0, a_max = max_depth) # only clip large depth values
207 | 
208 |         disp[valid] = camera.fxb_depth / (data[valid]  * depth_unit)
209 |     else:
210 |         # disparity scales with resolution
211 |         disp = data / scale
212 |     
213 |     valid = (disp > camera.min_disp) & (disp < camera.max_disp) & valid
214 |     # disp[valid] = np.clip(disp[valid], camera.min_disp, camera.max_disp) # DEBUG: * 1.333333
215 |     # disp[~valid] = 0.0
216 |     return disp, valid, camera.min_disp, camera.max_disp
217 | 
218 | def readDispDreds_exr(camera, filename):
219 |     depth = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
220 |     if len(depth.shape) == 3 and depth.shape[-1] == 3:
221 |         depth = depth [...,0]
222 | 
223 |     if depth.shape[:2] != camera.resolution:
224 |         # be very carefull here !!! only resize in depth space
225 |         depth = cv2.resize(depth, dsize=camera.resolution[::-1], interpolation=cv2.INTER_NEAREST) # same with DREDS
226 | 
227 |     valid = (~ (np.isinf(depth) | np.isnan(depth))) & (depth > 0.2) & (depth < 2)
228 |     disp = np.zeros_like(depth)
229 |     disp[valid] = camera.fxb / depth[valid]
230 |     # disp[valid] = np.clip(disp[valid], camera.min_disp, camera.max_disp)
231 |     return disp, valid, camera.min_disp, camera.max_disp
232 | 
233 | def readDispSTD_exr(filename):
234 |     disp = cv2.imread(filename, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
235 |     valid = (~ (np.isinf(disp) | np.isnan(disp))) & (disp != 0)
236 |     return disp, valid
237 | 
238 | def readDispSTD(file_name):
239 |     # depth_rgb = np.load(file_name)
240 |     gt_depth = cv2.imread(str(file_name), cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
241 |     gt_depth = cv2.resize(gt_depth, (640*2, 360*2), interpolation=cv2.INTER_NEAREST)
242 |     valid = ~ (np.isnan(gt_depth) | np.isinf(gt_depth))
243 |     gt_depth[~valid] = 0
244 | 
245 |     fx = 446.31
246 |     focal_length = fx * 2 # original ir size
247 |     baseline = 0.055
248 |     T_lc = np.eye(4) # color to left ir
249 |     T_lc[0,3] = -0.015
250 |     H, W = 360*2, 640*2
251 |     K = np.array([[fx*2, 0, W/2-0.5], [0, fx*2, H/2-0.5], [0, 0, 1]])
252 |     inv_K = np.linalg.inv(K)
253 | 
254 |     meshgrid = np.meshgrid(range(W), range(H), indexing='xy')
255 |     id_coords = np.stack(meshgrid, axis=0).astype(np.float32)
256 |     ones = np.ones((1, H * W), dtype=np.float32)
257 |     pix_coords = np.concatenate((id_coords[0].reshape(1, -1), id_coords[1].reshape(1, -1), ones), axis=0)
258 | 
259 |     gt_depth = gt_depth.reshape(1, H*W)
260 |     cam_points_ir = (inv_K @ pix_coords) * gt_depth
261 |     valid_mask = cam_points_ir[2] > 0. # filter out invalid points
262 | 
263 |     cam_points_ir = cam_points_ir[:, valid_mask]
264 |     cam_points_color = T_lc[:3,:3] @ cam_points_ir + T_lc[:3,3:] # convert to ir frame
265 | 
266 |     pix_coords_color = (K @ cam_points_color) # project to ir frame
267 |     pix_coords_color[:2] /= pix_coords_color[2:3] # normalize
268 | 
269 |     ir_depth = np.zeros((H, W), dtype=np.float32)# * np.inf
270 |     u, v = pix_coords_color[:2]
271 |     u_left, u_right = np.floor(u).astype(np.uint32), np.ceil(u).astype(np.uint32)
272 |     v_up, v_bottom = np.floor(v).astype(np.uint32), np.ceil(v).astype(np.uint32)
273 | 
274 |     def fill(depth_map, pred_depth, u, v):
275 |         u, v = u.astype(np.uint32), v.astype(np.uint32)
276 |         uv = np.vstack([u,v])
277 |         valid_color = (uv[0] >= 0) & (uv[0] < W) & (uv[1] >= 0) & (uv[1] < H)
278 |         u, v = uv[:, valid_color]
279 |         depth_map[v, u] = pred_depth[0, valid_mask][valid_color]
280 |     
281 |     # an ugly HACK
282 |     fill(ir_depth, gt_depth, u_left, v_up)
283 |     fill(ir_depth, gt_depth, u_left, v_bottom)
284 |     fill(ir_depth, gt_depth, u_right, v_up)
285 |     fill(ir_depth, gt_depth, u_right, v_bottom)
286 | 
287 |     uv = np.rint(pix_coords_color).astype(np.uint32)
288 |     valid_color = (uv[0] >= 0) & (uv[0] < W) & (uv[1] >= 0) & (uv[1] < H)
289 |     u, v = uv[:2, valid_color]
290 |     ir_depth[v, u] = gt_depth[0, valid_mask][valid_color]
291 | 
292 |     # fill holes
293 |     ir_depth_torch = torch.from_numpy(ir_depth).unsqueeze(0).unsqueeze(0)
294 |     holes_mask = (ir_depth == 0) #np.isinf(ir_depth)  # exclude occ-in/occ-out?
295 |     holes_mask[:, -20:] = False # another ugly hack exclude the right 10 cols
296 |     holes_coords = id_coords[:2, holes_mask][(1,0),:]
297 |     holes_coords_normal = holes_coords / np.array(([[H],[W]])) * 2 - 1
298 |     grid = torch.from_numpy(holes_coords_normal, ).transpose(1,0).reshape(1,1,-1,2)
299 |     interp = F.grid_sample(ir_depth_torch, grid.to(torch.float32), mode='nearest', padding_mode='zeros')
300 |     ir_depth[holes_mask] = interp[0,0,0,:].numpy()
301 | 
302 |     disp = np.zeros_like(ir_depth)
303 |     valid = valid & (ir_depth > 0)
304 |     disp[valid] = focal_length * baseline / ir_depth[valid]
305 |     
306 |     valid = disp > 0
307 |     return disp, valid
308 | 
309 | def readDispMiddlebury(file_name, extra_info=None): #, image_size 
310 |     import os
311 |     if basename(file_name) == 'disp0GT.pfm':
312 |         disp = readPFM(file_name).astype(np.float32)
313 |         # disp = cv2.resize(disp, image_size[::-1], cv2.INTER_NEAREST)
314 |         assert len(disp.shape) == 2
315 |         nocc_pix = file_name.replace('disp0GT.pfm', 'mask0nocc.png')
316 |         assert exists(nocc_pix)
317 |         nocc_pix = imageio.imread(nocc_pix) == 255
318 |         # nocc_pix = cv2.resize(nocc_pix, image_size[::-1], cv2.INTER_NEAREST)
319 |         assert np.any(nocc_pix)
320 |         calib_file = file_name.replace('disp0GT.pfm', 'calib.txt')
321 |         if exists(calib_file):
322 |             calib = {}
323 |             with open(calib_file, "r") as f:
324 |                 # read line by line
325 |                 lines = f.readlines()
326 |                 for line in lines:
327 |                     name, var = line.partition("=")[::2]
328 |                     if name.startswith("cam"):
329 |                         # parse matlab mat?
330 |                         arr = var[1:-2].split(';')
331 |                         to_list = lambda str_arr: list(map(float, str_arr.strip().split(' ')))
332 |                         calib[name] = [to_list(a) for a in arr]
333 |                     else:
334 |                         calib[name] = eval(var)
335 | 
336 |             # convert disp to depth
337 |             depth = np.zeros_like(disp)
338 |             depth[nocc_pix] = calib['baseline'] * calib['cam0'][0][0] / (calib['doffs'] + disp[nocc_pix]) * 1e-3 # meter
339 | 
340 |             if os.path.exists(file_name.replace("disp0GT.pfm", "im0.png_flow_pred.npy")):
341 |                 raft_disp = np.load(file_name.replace("disp0GT.pfm", "im0.png_flow_pred.npy"))
342 |                 raw_depth = calib['baseline'] * calib['cam0'][0][0] / (calib['doffs'] + -raft_disp) * 1e-3 # meter
343 |             else:
344 |                 raw_depth = depth
345 |             return disp, nocc_pix, depth, np.array(calib["cam0"]), raw_depth
346 |         
347 |         return disp, nocc_pix, np.zeros_like(disp)
348 |         
349 |     elif basename(file_name) == 'disp0.pfm':
350 |         disp = readPFM(file_name).astype(np.float32)
351 |         valid = disp < 1e3
352 |         return disp, valid
353 | 
354 | def writeFlowKITTI(filename, uv):
355 |     uv = 64.0 * uv + 2**15
356 |     valid = np.ones([uv.shape[0], uv.shape[1], 1])
357 |     uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16)
358 |     cv2.imwrite(filename, uv[..., ::-1])
359 |     
360 | def read_sceneflow(resolution, file_name, pil=False):
361 |     """ 
362 |     train sceneflow with different resolution
363 |     resolution: HxW 
364 |     """
365 |     try:
366 |         disp = np.array(read_gen(file_name, pil)).astype(np.float32)
367 |     except:
368 |         print(f"invalid ground truth file, {file_name}")
369 |         
370 |     assert len(disp.shape) == 2
371 |     scale, min_disp, max_disp = 1., 0.5, 256.
372 |     if resolution is not None and disp.shape != tuple(resolution):
373 |         scale = disp.shape[0] / resolution[0]
374 |         disp = cv2.resize(disp, resolution[::-1], cv2.INTER_NEAREST) #cv2.INTER_LINEAR
375 |         disp = disp / scale
376 |         max_disp = max_disp / scale
377 |         min_disp = min_disp / scale
378 |     return disp, (disp < max_disp) & (disp > min_disp), min_disp, max_disp
379 | 
380 | def read_gen(file_name, pil=False):
381 |     ext = splitext(file_name)[-1]
382 |     if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
383 |         return Image.open(file_name)
384 |     elif ext == '.bin' or ext == '.raw':
385 |         return np.load(file_name)
386 |     elif ext == '.flo':
387 |         return readFlow(file_name).astype(np.float32)
388 |     elif ext == '.pfm':
389 |         flow = readPFM(file_name).astype(np.float32)
390 |         if len(flow.shape) == 2:
391 |             return flow
392 |         else:
393 |             return flow[:, :, :-1]
394 |     elif ext == ".npy":
395 |         return np.load(file_name).astype(np.float32)
396 |     elif ext == ".exr":
397 |         return cv2.imread(file_name, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
398 |     return []
399 | 
400 | 
401 | #https://stackoverflow.com/questions/37662180/interpolate-missing-values-2d-python
402 | def interpolate_missing_pixels(
403 |     image: np.ndarray,
404 |     mask: np.ndarray,
405 |     method: str = 'nearest',
406 |     fill_value: int = 0
407 | ):
408 |     """
409 |     :param image: a 2D image
410 |     :param mask: a 2D boolean image, True indicates missing values
411 |     :param method: interpolation method, one of
412 |         'nearest', 'linear', 'cubic'.
413 |     :param fill_value: which value to use for filling up data outside the
414 |         convex hull of known pixel values.
415 |         Default is 0, Has no effect for 'nearest'.
416 |     :return: the image with missing values interpolated
417 |     """
418 |     assert len(image.shape) == 2, "should pass a 2D image"
419 |     h, w = image.shape[:2]
420 |     xx, yy = np.meshgrid(np.arange(w), np.arange(h))
421 | 
422 |     known_x = xx[~mask]
423 |     known_y = yy[~mask]
424 |     known_v = image[~mask]
425 |     missing_x = xx[mask]
426 |     missing_y = yy[mask]
427 | 
428 |     interp_values = interpolate.griddata(
429 |         (known_x, known_y), known_v, (missing_x, missing_y),
430 |         method=method, fill_value=fill_value
431 |     )
432 | 
433 |     interp_image = image.copy()
434 |     interp_image[missing_y, missing_x] = interp_values
435 | 
436 |     return interp_image


--------------------------------------------------------------------------------