├── config
    ├── loss
    │   ├── mapping
    │   │   ├── l1.yaml
    │   │   ├── l2.yaml
    │   │   └── huber.yaml
    │   ├── cc.yaml
    │   ├── dynamic_area.yaml
    │   ├── flow_3d.yaml
    │   ├── tracking_3d.yaml
    │   └── shape.yaml
    ├── model
    │   ├── intrinsics
    │   │   ├── model.yaml
    │   │   ├── ground_truth.yaml
    │   │   ├── regressed.yaml
    │   │   └── softmin.yaml
    │   ├── extrinsics
    │   │   ├── regressed.yaml
    │   │   ├── procrustes_ransac.yaml
    │   │   └── procrustes_flow.yaml
    │   └── backbone
    │   │   ├── explicit_depth.yaml
    │   │   ├── midas.yaml
    │   │   ├── unidepth.yaml
    │   │   └── nvds_unidepth.yaml
    ├── flow
    │   └── gmflow.yaml
    ├── visualizer
    │   ├── summary.yaml
    │   └── trajectory.yaml
    ├── tracking
    │   └── cotracker.yaml
    ├── dataset
    │   ├── fpha.yaml
    │   ├── h2o.yaml
    │   ├── egopat3d.yaml
    │   ├── pov_surgery.yaml
    │   ├── epic_kitchen.yaml
    │   ├── arctic.yaml
    │   └── hoi4d.yaml
    ├── datagen_egopat3d.yaml
    ├── pretrain.yaml
    ├── datagen_arctic.yaml
    ├── datagen_fpha.yaml
    ├── datagen_h2o.yaml
    ├── datagen_hoi4d.yaml
    ├── pretrain_eval_h2o.yaml
    ├── pretrain_eval_hoi4d.yaml
    ├── pretrain_eval_arctic.yaml
    ├── pretrain_eval_pov_surgery.yaml
    ├── datagen_pov_surgery.yaml
    └── datagen_epic_kitchen.yaml
├── egomono4d
    ├── repo
    │   └── gmflow
    │   │   ├── gmflow
    │   │       ├── __init__.py
    │   │       ├── position.py
    │   │       ├── utils.py
    │   │       ├── geometry.py
    │   │       ├── trident_conv.py
    │   │       └── matching.py
    │   │   ├── demo
    │   │       └── davis_breakdance-flare
    │   │       │   ├── 00000.jpg
    │   │       │   ├── 00001.jpg
    │   │       │   └── 00002.jpg
    │   │   ├── data
    │   │       └── __init__.py
    │   │   ├── loss.py
    │   │   ├── utils
    │   │       ├── misc.py
    │   │       ├── utils.py
    │   │       ├── logger.py
    │   │       └── dist_utils.py
    │   │   ├── scripts
    │   │       ├── submission.sh
    │   │       ├── demo.sh
    │   │       ├── evaluate.sh
    │   │       └── train_gmflow.sh
    │   │   └── .gitignore
    ├── model
    │   ├── backbone
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── transformer.py
    │   │   ├── backbone.py
    │   │   ├── __init__.py
    │   │   └── backbone_explicit_depth.py
    │   ├── model_pretrain_cfg.py
    │   ├── intrinsics
    │   │   ├── common.py
    │   │   ├── __init__.py
    │   │   ├── intrinsics.py
    │   │   ├── intrinsics_ground_truth.py
    │   │   ├── intrinsics_regressed.py
    │   │   └── intrinsics_model.py
    │   └── extrinsics
    │   │   ├── __init__.py
    │   │   ├── extrinsics.py
    │   │   ├── extrinsics_procrustes_ransac.py
    │   │   ├── extrinsics_procrustes_flow.py
    │   │   └── extrinsics_regressed.py
    ├── visualization
    │   ├── drawing
    │   │   ├── __init__.py
    │   │   ├── coordinate_conversion.py
    │   │   ├── types.py
    │   │   ├── points.py
    │   │   └── lines.py
    │   ├── __init__.py
    │   ├── depth.py
    │   ├── color.py
    │   └── visualizer.py
    ├── eval
    │   ├── __init__.py
    │   ├── eval_depth.py
    │   ├── eval_extrinsic.py
    │   └── eval_pointcloud.py
    ├── dataset
    │   ├── data_module_pretrain_cfg.py
    │   ├── dataset_merged.py
    │   ├── types.py
    │   ├── __init__.py
    │   └── data_module_pretrain.py
    ├── loss
    │   ├── mapping
    │   │   ├── __init__.py
    │   │   ├── mapping_l1.py
    │   │   ├── mapping_l2.py
    │   │   ├── mapping_huber.py
    │   │   └── mapping.py
    │   ├── __init__.py
    │   ├── loss_cc.py
    │   ├── loss.py
    │   ├── loss_shape.py
    │   └── loss_dynamic_area.py
    ├── frame_sampler
    │   ├── __init__.py
    │   ├── frame_sampler.py
    │   └── frame_sampler_pretrain.py
    ├── config
    │   ├── pretrain.py
    │   ├── tools.py
    │   └── common.py
    ├── misc
    │   ├── nn_module_tools.py
    │   ├── ate.py
    │   ├── local_logger.py
    │   ├── disk_cache.py
    │   ├── config_tools.py
    │   ├── wandb_tools.py
    │   ├── image_io.py
    │   ├── common_training_setup.py
    │   ├── depth.py
    │   ├── fly.py
    │   └── data_util.py
    ├── flow
    │   ├── common.py
    │   ├── __init__.py
    │   └── flow_predictor_gmflow.py
    ├── utils.py
    ├── tracking
    │   ├── track_predictor.py
    │   └── track_predictor_cotracker.py
    └── datagen.py
├── lightning_logs
    ├── version_2
    │   └── hparams.yaml
    ├── version_3
    │   └── hparams.yaml
    └── version_4
    │   └── hparams.yaml
├── cache.zip
├── assets
    ├── teaser.png
    └── vis-result.png
├── examples
    ├── example_hoi4d
    │   ├── 00140.jpg
    │   ├── 00144.jpg
    │   ├── 00148.jpg
    │   ├── 00152.jpg
    │   ├── 00156.jpg
    │   ├── 00160.jpg
    │   ├── 00164.jpg
    │   ├── 00168.jpg
    │   ├── 00172.jpg
    │   ├── 00176.jpg
    │   ├── 00180.jpg
    │   ├── 00184.jpg
    │   ├── 00188.jpg
    │   ├── 00192.jpg
    │   ├── 00196.jpg
    │   ├── 00200.jpg
    │   ├── 00204.jpg
    │   ├── 00208.jpg
    │   ├── 00212.jpg
    │   ├── 00216.jpg
    │   ├── 00220.jpg
    │   ├── 00224.jpg
    │   ├── 00228.jpg
    │   ├── 00232.jpg
    │   ├── 00236.jpg
    │   ├── 00240.jpg
    │   └── 00244.jpg
    └── example_epic_kitchen
    │   ├── frame_0000015420.jpg
    │   ├── frame_0000015421.jpg
    │   ├── frame_0000015422.jpg
    │   ├── frame_0000015423.jpg
    │   ├── frame_0000015424.jpg
    │   ├── frame_0000015425.jpg
    │   ├── frame_0000015426.jpg
    │   ├── frame_0000015427.jpg
    │   ├── frame_0000015428.jpg
    │   ├── frame_0000015429.jpg
    │   ├── frame_0000015430.jpg
    │   ├── frame_0000015431.jpg
    │   ├── frame_0000015432.jpg
    │   ├── frame_0000015433.jpg
    │   ├── frame_0000015434.jpg
    │   ├── frame_0000015435.jpg
    │   ├── frame_0000015436.jpg
    │   ├── frame_0000015437.jpg
    │   ├── frame_0000015438.jpg
    │   ├── frame_0000015439.jpg
    │   ├── frame_0000015440.jpg
    │   ├── frame_0000015441.jpg
    │   ├── frame_0000015442.jpg
    │   ├── frame_0000015443.jpg
    │   ├── frame_0000015444.jpg
    │   ├── frame_0000015445.jpg
    │   ├── frame_0000015446.jpg
    │   ├── frame_0000015447.jpg
    │   ├── frame_0000015448.jpg
    │   ├── frame_0000015449.jpg
    │   ├── frame_0000015450.jpg
    │   ├── frame_0000015451.jpg
    │   ├── frame_0000015452.jpg
    │   ├── frame_0000015453.jpg
    │   ├── frame_0000015454.jpg
    │   ├── frame_0000015455.jpg
    │   ├── frame_0000015456.jpg
    │   ├── frame_0000015457.jpg
    │   ├── frame_0000015458.jpg
    │   ├── frame_0000015459.jpg
    │   └── frame_0000015460.jpg
├── .gitmodules
├── pyproject.toml
├── LICENSE
└── .gitignore


/config/loss/mapping/l1.yaml:
--------------------------------------------------------------------------------
1 | name: l1
2 | 


--------------------------------------------------------------------------------
/config/loss/mapping/l2.yaml:
--------------------------------------------------------------------------------
1 | name: l2
2 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/gmflow/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/egomono4d/model/backbone/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightning_logs/version_2/hparams.yaml:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/lightning_logs/version_3/hparams.yaml:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/lightning_logs/version_4/hparams.yaml:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/config/model/intrinsics/model.yaml:
--------------------------------------------------------------------------------
1 | name: model
2 | 


--------------------------------------------------------------------------------
/config/model/extrinsics/regressed.yaml:
--------------------------------------------------------------------------------
1 | name: regressed
2 | 


--------------------------------------------------------------------------------
/config/loss/cc.yaml:
--------------------------------------------------------------------------------
1 | cc:
2 |   weight: 10.0
3 |   enable_after: 0


--------------------------------------------------------------------------------
/config/flow/gmflow.yaml:
--------------------------------------------------------------------------------
1 | name: gmflow
2 | 
3 | cache_dir: null
4 | 


--------------------------------------------------------------------------------
/config/loss/mapping/huber.yaml:
--------------------------------------------------------------------------------
1 | name: huber
2 | 
3 | delta: 0.01
4 | 


--------------------------------------------------------------------------------
/config/model/intrinsics/ground_truth.yaml:
--------------------------------------------------------------------------------
1 | name: ground_truth
2 | 


--------------------------------------------------------------------------------
/config/visualizer/summary.yaml:
--------------------------------------------------------------------------------
1 | summary:
2 |   num_vis_frames: 5
3 | 


--------------------------------------------------------------------------------
/cache.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/cache.zip


--------------------------------------------------------------------------------
/config/loss/dynamic_area.yaml:
--------------------------------------------------------------------------------
1 | dynamic_area:
2 |   weight: 10.0
3 |   enable_after: 0


--------------------------------------------------------------------------------
/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/assets/teaser.png


--------------------------------------------------------------------------------
/assets/vis-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/assets/vis-result.png


--------------------------------------------------------------------------------
/config/loss/flow_3d.yaml:
--------------------------------------------------------------------------------
1 | flow_3d:
2 |   weight: 10.0
3 |   enable_after: 0
4 |   # upper_threshold: 0.5


--------------------------------------------------------------------------------
/config/visualizer/trajectory.yaml:
--------------------------------------------------------------------------------
1 | trajectory:
2 |   generate_plot: true
3 |   ate_save_root: null
4 | 


--------------------------------------------------------------------------------
/config/loss/tracking_3d.yaml:
--------------------------------------------------------------------------------
1 | tracking_3d:
2 |   weight: 10.0
3 |   enable_after: 0
4 |   # upper_threshold: 0.5
5 | 


--------------------------------------------------------------------------------
/config/model/extrinsics/procrustes_ransac.yaml:
--------------------------------------------------------------------------------
1 | name: procrustes_ransac
2 | 
3 | max_iter: 5
4 | num_points: 4800
5 | 


--------------------------------------------------------------------------------
/config/tracking/cotracker.yaml:
--------------------------------------------------------------------------------
1 | name: cotracker
2 | 
3 | grid_size: 35
4 | similarity_threshold: 0.75
5 | cache_dir: null


--------------------------------------------------------------------------------
/examples/example_hoi4d/00140.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00140.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00144.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00144.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00148.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00148.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00152.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00152.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00156.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00156.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00160.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00164.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00164.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00168.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00168.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00172.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00172.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00176.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00176.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00180.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00180.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00184.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00184.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00188.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00188.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00192.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00192.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00196.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00196.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00200.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00200.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00204.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00204.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00208.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00208.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00212.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00212.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00216.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00216.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00220.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00220.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00224.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00224.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00228.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00228.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00232.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00232.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00236.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00236.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00240.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00240.jpg


--------------------------------------------------------------------------------
/examples/example_hoi4d/00244.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_hoi4d/00244.jpg


--------------------------------------------------------------------------------
/config/model/backbone/explicit_depth.yaml:
--------------------------------------------------------------------------------
1 | name: explicit_depth
2 | 
3 | initial_depth: 0.1
4 | weight_sensitivity: 100.0
5 | 


--------------------------------------------------------------------------------
/config/model/extrinsics/procrustes_flow.yaml:
--------------------------------------------------------------------------------
1 | name: procrustes_flow
2 | 
3 | num_points: 1000
4 | randomize_points: false
5 | 


--------------------------------------------------------------------------------
/egomono4d/visualization/drawing/__init__.py:
--------------------------------------------------------------------------------
1 | from .lines import draw_lines as draw_lines
2 | from .points import draw_points as draw_points
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "flowcam/third_party/gmflow"]
2 | 	path = flowmap/third_party/gmflow
3 | 	url = https://github.com/haofeixu/gmflow.git
4 | 


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015420.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015420.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015421.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015421.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015422.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015422.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015423.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015423.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015424.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015424.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015425.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015425.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015426.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015426.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015427.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015427.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015428.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015428.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015429.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015429.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015430.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015430.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015431.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015431.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015432.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015432.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015433.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015433.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015434.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015434.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015435.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015435.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015436.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015436.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015437.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015437.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015438.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015438.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015439.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015439.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015440.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015440.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015441.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015441.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015442.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015442.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015443.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015443.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015444.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015444.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015445.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015445.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015446.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015446.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015447.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015447.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015448.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015448.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015449.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015449.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015450.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015450.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015451.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015451.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015452.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015452.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015453.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015453.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015454.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015454.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015455.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015455.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015456.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015456.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015457.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015457.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015458.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015458.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015459.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015459.jpg


--------------------------------------------------------------------------------
/examples/example_epic_kitchen/frame_0000015460.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/examples/example_epic_kitchen/frame_0000015460.jpg


--------------------------------------------------------------------------------
/config/loss/shape.yaml:
--------------------------------------------------------------------------------
1 | shape:
2 |   weight: 10.0
3 |   enable_after: 0
4 |   dynamic_coef: 1.0
5 |   decay_end_epochs: -1    # [-1] close 
6 |   decay_low_weight: 0.0


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/demo/davis_breakdance-flare/00000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/egomono4d/repo/gmflow/demo/davis_breakdance-flare/00000.jpg


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/demo/davis_breakdance-flare/00001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/egomono4d/repo/gmflow/demo/davis_breakdance-flare/00001.jpg


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/demo/davis_breakdance-flare/00002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelyuancb/egomono4d/HEAD/egomono4d/repo/gmflow/demo/davis_breakdance-flare/00002.jpg


--------------------------------------------------------------------------------
/egomono4d/model/model_pretrain_cfg.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | 
3 | @dataclass
4 | class ModelWrapperPretrainCfg:
5 |     lr: float = 5e-5
6 |     cache_track: bool = ""
7 | 
8 | 


--------------------------------------------------------------------------------
/config/model/backbone/midas.yaml:
--------------------------------------------------------------------------------
1 | name: midas
2 | 
3 | pretrained: true
4 | weight_sensitivity: null
5 | mapping: original
6 | model: MiDaS_small
7 | local_dir: /home/ycb/hub/torch/hub/intel-isl_MiDaS_master


--------------------------------------------------------------------------------
/config/model/intrinsics/regressed.yaml:
--------------------------------------------------------------------------------
1 | name: regressed
2 | 
3 | # This is roughly in the middle of the focal length distribution for Tanks & Temples,
4 | # LLFF, and MipNeRF 360.
5 | initial_focal_length: 0.85
6 | 


--------------------------------------------------------------------------------
/config/model/backbone/unidepth.yaml:
--------------------------------------------------------------------------------
1 | name: unidepth
2 | 
3 | cache_dir: null
4 | estimator: unidepth_v2_large       # unidepth_v2_[large, small]  
5 | finetune_head: true                # whether to only finetune dpt head of depth-anything-v2


--------------------------------------------------------------------------------
/egomono4d/eval/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_depth import eval_depth_conductor
2 | from .eval_extrinsic import eval_extrinsic_conductor
3 | from .eval_track import eval_track_conductor
4 | from .eval_track_hoi import eval_track_hoi_conductor
5 | from .eval_pointcloud import eval_pointcloud_conductor


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import build_train_dataset
2 | from .datasets import (FlyingChairs,
3 |                        FlyingThings3D,
4 |                        MpiSintel,
5 |                        KITTI,
6 |                        HD1K,
7 |                        )
8 | 


--------------------------------------------------------------------------------
/egomono4d/dataset/data_module_pretrain_cfg.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from dataclasses import dataclass
 3 | 
 4 | @dataclass
 5 | class DataLoaderStageCfg:
 6 |     batch_size: int = 1
 7 |     num_workers: int = 1
 8 |     persistent_workers: bool = True
 9 |     seed: Optional[int] = None
10 | 
11 | 
12 | @dataclass
13 | class DataModulePretrainCfg:
14 |     train: DataLoaderStageCfg
15 |     val: DataLoaderStageCfg


--------------------------------------------------------------------------------
/config/model/intrinsics/softmin.yaml:
--------------------------------------------------------------------------------
 1 | name: softmin
 2 | 
 3 | num_procrustes_points: 8192
 4 | 
 5 | min_focal_length: 0.5
 6 | max_focal_length: 2.0
 7 | num_candidates: 60
 8 | 
 9 | # If this is non-null, the intrinsics will be regressed after the specified number of
10 | # steps. The initial regressed value will be the mean of the last n non-regressed
11 | # intrinsics estimates, where n is window.
12 | regression:
13 |   after_step: 1000
14 |   window: 100
15 | 


--------------------------------------------------------------------------------
/config/model/backbone/nvds_unidepth.yaml:
--------------------------------------------------------------------------------
 1 | name: nvds_unidepth
 2 | 
 3 | cache_dir: null
 4 | estimator: unidepth_v2_large       # unidepth_v2_[large, small]  
 5 | finetune_head: true                # whether to only finetune dpt head of depth-anything-v2
 6 | 
 7 | unet_num: 1
 8 | unet_channels: [256, 256, 384, 384]     # torch.Size([20, 42, 56, 1024])
 9 | unet_kernel_size: 3
10 | unet_groups: 1
11 | 
12 | transformer_depth: 2
13 | transformer_heads: 4
14 | transformer_dim_head: 64
15 | transformer_mlp_dim: 256
16 | 


--------------------------------------------------------------------------------
/egomono4d/loss/mapping/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mapping import Mapping
 2 | from .mapping_huber import MappingHuber, MappingHuberCfg
 3 | from .mapping_l1 import MappingL1, MappingL1Cfg
 4 | from .mapping_l2 import MappingL2, MappingL2Cfg
 5 | 
 6 | MAPPINGS = {
 7 |     "huber": MappingHuber,
 8 |     "l1": MappingL1,
 9 |     "l2": MappingL2,
10 | }
11 | 
12 | MappingCfg = MappingHuberCfg | MappingL1Cfg | MappingL2Cfg 
13 | 
14 | 
15 | def get_mapping(cfg: MappingCfg) -> Mapping:
16 |     return MAPPINGS[cfg.name](cfg)
17 | 


--------------------------------------------------------------------------------
/egomono4d/loss/mapping/mapping_l1.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | from jaxtyping import Float
 5 | from torch import Tensor
 6 | 
 7 | from .mapping import Mapping
 8 | 
 9 | 
10 | @dataclass
11 | class MappingL1Cfg:
12 |     name: Literal["l1"]
13 | 
14 | 
15 | class MappingL1(Mapping[MappingL1Cfg]):
16 |     def forward_undistorted(
17 |         self,
18 |         delta: Float[Tensor, "*batch 2"],
19 |     ) -> Float[Tensor, " *batch"]:
20 |         return delta.norm(dim=-1)
21 | 


--------------------------------------------------------------------------------
/egomono4d/frame_sampler/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from .frame_sampler import FrameSampler
 4 | from .frame_sampler_pretrain import FrameSamplerPretrainNeighbor, FrameSamplerPretrainInterval
 5 | 
 6 | FRAME_SAMPLER = {
 7 |     "pretrain_neighbor": FrameSamplerPretrainNeighbor,   # pick num_frames neighborhood
 8 |     "pretrain_interval": FrameSamplerPretrainInterval,   # pick random index (with random interval)
 9 | }
10 | 
11 | 
12 | def get_frame_sampler(fs_name, num_frames, stage) -> FrameSampler[Any]:
13 |     return FRAME_SAMPLER[fs_name](num_frames, stage)
14 | 


--------------------------------------------------------------------------------
/egomono4d/loss/mapping/mapping_l2.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | from jaxtyping import Float
 5 | from torch import Tensor
 6 | 
 7 | from .mapping import Mapping
 8 | 
 9 | 
10 | @dataclass
11 | class MappingL2Cfg:
12 |     name: Literal["l2"]
13 | 
14 | 
15 | class MappingL2(Mapping[MappingL2Cfg]):
16 |     def forward_undistorted(
17 |         self,
18 |         delta: Float[Tensor, "*batch 2"],
19 |     ) -> Float[Tensor, " *batch"]:
20 |         # Multiply by 0.5 to match torch.nn.functional.huber_loss.
21 |         return 0.5 * (delta * delta).sum(dim=-1)
22 | 


--------------------------------------------------------------------------------
/config/dataset/fpha.yaml:
--------------------------------------------------------------------------------
 1 | fpha:
 2 |   # Common configuration items (all datasets have these)
 3 |   scene: null
 4 |   cache_dir: null
 5 |   resize_shape: null
 6 |   patch_size: null
 7 |   num_frames: null
 8 |   all_frames: false 
 9 |   use_gt_depth: false
10 | 
11 |   mask_estimation: ['egohos']
12 |   mask_flow_model: null
13 |   mask_binary_open_value: null 
14 | 
15 |   frame_sampler: pretrain_interval
16 |   frame_max_interval: 4
17 | 
18 |   # Dataset-specific configuration items
19 |   clip_frame: 20
20 |   original_base_root: ./cache/original_datasets/FPHA
21 |   pre_save_root: ./cache/processed_datasets


--------------------------------------------------------------------------------
/egomono4d/config/pretrain.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from ..misc.data_util import PreProcessingCfg
 4 | from ..dataset.data_module_pretrain_cfg import DataModulePretrainCfg
 5 | from ..model.model_pretrain_cfg import ModelWrapperPretrainCfg
 6 | from .common import CommonCfg
 7 | 
 8 | 
 9 | @dataclass
10 | class StageCfg:
11 |     batch_size: int = 1
12 |     num_workers: int = 1
13 | 
14 | 
15 | @dataclass
16 | class PretrainCfg(CommonCfg):
17 |     model_wrapper: ModelWrapperPretrainCfg = None
18 |     data_module: DataModulePretrainCfg = None
19 |     preprocess: PreProcessingCfg = None
20 | 


--------------------------------------------------------------------------------
/config/dataset/h2o.yaml:
--------------------------------------------------------------------------------
 1 | h2o:
 2 |   # Common configuration items (all datasets have these)
 3 |   scene: null
 4 |   cache_dir: null
 5 |   resize_shape: null
 6 |   patch_size: null
 7 |   num_frames: null
 8 |   all_frames: false  
 9 |   use_gt_depth: false
10 | 
11 |   mask_estimation: ['egohos']
12 |   mask_flow_model: null
13 |   mask_binary_open_value: null 
14 | 
15 |   frame_sampler: pretrain_interval
16 |   frame_max_interval: 4
17 | 
18 |   # Dataset-specific configuration items
19 |   clip_frame: 20
20 |   original_base_root: ./cache/original_datasets/H2O/downloads
21 |   pre_save_root: ./cache/processed_datasets


--------------------------------------------------------------------------------
/egomono4d/visualization/__init__.py:
--------------------------------------------------------------------------------
 1 | from .visualizer import Visualizer
 2 | from .visualizer_summary import VisualizerSummary, VisualizerSummaryCfg
 3 | from .visualizer_trajectory import VisualizerTrajectory, VisualizerTrajectoryCfg
 4 | from .visualizer_cotracker import VisualizerCoTracker
 5 | 
 6 | VISUALIZERS = {
 7 |     "summary": VisualizerSummary,
 8 |     "trajectory": VisualizerTrajectory,
 9 | }
10 | 
11 | VisualizerCfg = VisualizerSummaryCfg | VisualizerTrajectoryCfg
12 | 
13 | 
14 | def get_visualizers(cfgs: list[VisualizerCfg]) -> list[Visualizer]:
15 |     return [VISUALIZERS[cfg.name](cfg) for cfg in cfgs]
16 | 


--------------------------------------------------------------------------------
/config/dataset/egopat3d.yaml:
--------------------------------------------------------------------------------
 1 | egopat3d:
 2 |   # Common configuration items (all datasets have these)
 3 |   scene: null
 4 |   cache_dir: null
 5 |   resize_shape: null
 6 |   patch_size: null
 7 |   num_frames: null
 8 |   all_frames: false    
 9 |   use_gt_depth: false
10 | 
11 |   mask_estimation: ['egohos']
12 |   mask_flow_model: null
13 |   mask_binary_open_value: null 
14 | 
15 |   frame_sampler: pretrain_interval
16 |   frame_max_interval: 4
17 | 
18 |   # Dataset-specific configuration items
19 |   clip_frame: 20
20 |   original_base_root: ./cache/original_datasets/EgoPAT3D
21 |   pre_save_root: ./cache/processed_datasets


--------------------------------------------------------------------------------
/config/dataset/pov_surgery.yaml:
--------------------------------------------------------------------------------
 1 | pov_surgery:
 2 |   # Common configuration items (all datasets have these)
 3 |   scene: null
 4 |   cache_dir: null
 5 |   resize_shape: null
 6 |   patch_size: null
 7 |   num_frames: null
 8 |   all_frames: false     
 9 |   use_gt_depth: false
10 | 
11 |   mask_estimation: ['egohos']
12 |   mask_flow_model: null
13 |   mask_binary_open_value: null 
14 | 
15 |   frame_sampler: pretrain_interval
16 |   frame_max_interval: 4
17 | 
18 |   # Dataset-specific configuration items
19 |   clip_frame: 40
20 |   original_base_root: ./cache/original_datasets/POV_Surgery/POV_Surgery_data
21 |   pre_save_root: ./cache/processed_datasets


--------------------------------------------------------------------------------
/egomono4d/misc/nn_module_tools.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | def convert_to_buffer(module: nn.Module, persistent: bool = True):
 5 |     # Recurse over child modules.
 6 |     for name, child in list(module.named_children()):
 7 |         convert_to_buffer(child, persistent)
 8 | 
 9 |     # Also re-save buffers to change persistence.
10 |     for name, parameter_or_buffer in (
11 |         *module.named_parameters(recurse=False),
12 |         *module.named_buffers(recurse=False),
13 |     ):
14 |         value = parameter_or_buffer.detach().clone()
15 |         delattr(module, name)
16 |         module.register_buffer(name, value, persistent=persistent)
17 | 


--------------------------------------------------------------------------------
/egomono4d/flow/common.py:
--------------------------------------------------------------------------------
 1 | from einops import rearrange
 2 | from jaxtyping import Float
 3 | from torch import Tensor
 4 | 
 5 | 
 6 | def split_videos(
 7 |     videos: Float[Tensor, "batch frame 3 height width"],
 8 | ) -> tuple[
 9 |     Float[Tensor, "batch*(frame-1) 3 height width"],  # source (flattened batch dims)
10 |     Float[Tensor, "batch*(frame-1) 3 height width"],  # target (flattened batch dims)
11 |     int,  # batch
12 |     int,  # frame
13 | ]:
14 |     b, f, _, _, _ = videos.shape
15 |     return (
16 |         rearrange(videos[:, :-1], "b f c h w -> (b f) c h w"),
17 |         rearrange(videos[:, 1:], "b f c h w -> (b f) c h w"),
18 |         b,
19 |         f,
20 |     )
21 | 


--------------------------------------------------------------------------------
/config/dataset/epic_kitchen.yaml:
--------------------------------------------------------------------------------
 1 | epic_kitchen:
 2 |   # Common configuration items (all datasets have these)
 3 |   scene: null
 4 |   cache_dir: null
 5 |   resize_shape: null
 6 |   patch_size: null
 7 |   num_frames: null
 8 |   all_frames: false   
 9 |   use_gt_depth: false
10 | 
11 |   mask_estimation: null
12 |   mask_flow_model: null
13 |   mask_binary_open_value: null 
14 | 
15 |   frame_sampler: pretrain_interval
16 |   frame_max_interval: 4
17 | 
18 |   # Dataset-specific configuration items
19 |   max_clip_per_video: 1200
20 |   clip_frame: 20
21 |   original_base_root: ./cache/original_datasets/EpicKitchen/EPIC-KITCHENS
22 |   intrinsic_root: none
23 |   pre_save_root: ./cache/processed_datasets


--------------------------------------------------------------------------------
/config/dataset/arctic.yaml:
--------------------------------------------------------------------------------
 1 | arctic:
 2 |   # Common configuration items (all datasets have these)
 3 |   scene: null
 4 |   cache_dir: null
 5 |   resize_shape: null
 6 |   patch_size: null
 7 |   num_frames: null
 8 |   all_frames: false  
 9 |   use_gt_depth: false
10 | 
11 |   mask_estimation: ['egohos']
12 |   mask_flow_model: null
13 |   mask_binary_open_value: null 
14 | 
15 |   frame_sampler: pretrain_interval
16 |   frame_max_interval: 4
17 | 
18 |   # Dataset-specific configuration items
19 |   clip_frame: 40
20 |   original_data_root: ./cache/original_datasets/ARCTIC/arctic/data/
21 |   original_render_root: ./cache/original_datasets/ARCTIC/arctic/render_out
22 |   pre_save_root: ./cache/processed_datasets


--------------------------------------------------------------------------------
/egomono4d/model/intrinsics/common.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from jaxtyping import Float
 3 | from torch import Tensor
 4 | 
 5 | 
 6 | def focal_lengths_to_intrinsics(
 7 |     focal_lengths: Float[Tensor, " *batch"],
 8 |     image_shape: tuple[int, int],
 9 | ) -> Float[Tensor, "*batch 3 3"]:
10 |     device = focal_lengths.device
11 |     h, w = image_shape
12 |     focal_lengths = focal_lengths * (h * w) ** 0.5
13 | 
14 |     intrinsics = torch.eye(3, dtype=torch.float32, device=device)
15 |     intrinsics[:2, 2] = 0.5
16 |     intrinsics = intrinsics.broadcast_to((*focal_lengths.shape, 3, 3)).contiguous()
17 |     intrinsics[..., 0, 0] = focal_lengths / w  # fx
18 |     intrinsics[..., 1, 1] = focal_lengths / h  # fy
19 | 
20 |     return intrinsics
21 | 


--------------------------------------------------------------------------------
/egomono4d/loss/__init__.py:
--------------------------------------------------------------------------------
 1 | from .loss import Loss
 2 | from .loss_dynamic_area import LossDynamicArea, LossDynamicAreaCfg
 3 | from .loss_shape import LossShape, LossShapeCfg #, loss_shape_func
 4 | from .loss_flow_3d import LossFlow3D, LossFlow3DCfg #, loss_flow_3d_func
 5 | from .loss_tracking_3d import LossTracking3D, LossTracking3DCfg #, loss_tracking_3d_func
 6 | from .loss_cc import LossCC, LossCCCfg
 7 | 
 8 | LOSSES = {
 9 |     "dynamic_area": LossDynamicArea,
10 |     "tracking_3d": LossTracking3D,
11 |     "flow_3d": LossFlow3D,
12 |     "shape": LossShape,
13 |     "cc": LossCC,
14 | }
15 | 
16 | LossCfg = LossDynamicAreaCfg | LossTracking3DCfg | LossCCCfg | LossShapeCfg | LossFlow3DCfg
17 | 
18 | 
19 | def get_losses(cfgs: list[LossCfg]) -> list[Loss]:
20 |     return [LOSSES[cfg.name](cfg) for cfg in cfgs]
21 | 


--------------------------------------------------------------------------------
/egomono4d/model/extrinsics/__init__.py:
--------------------------------------------------------------------------------
 1 | from .extrinsics import Extrinsics
 2 | from .extrinsics_procrustes_flow import ExtrinsicsProcrustesFlow, ExtrinsicsProcrustesFlowCfg
 3 | from .extrinsics_regressed import ExtrinsicsRegressed, ExtrinsicsRegressedCfg
 4 | from .extrinsics_procrustes_ransac import ExtrinsicsProcrustesRANSAC, ExtrinsicsProcrustesRANSACCfg
 5 | 
 6 | EXTRINSICS = {
 7 |     "regressed": ExtrinsicsRegressed,
 8 |     "procrustes_flow": ExtrinsicsProcrustesFlow,
 9 |     "procrustes_ransac": ExtrinsicsProcrustesRANSAC
10 | }
11 | 
12 | ExtrinsicsCfg = ExtrinsicsRegressedCfg | ExtrinsicsProcrustesFlowCfg | ExtrinsicsProcrustesRANSACCfg
13 | 
14 | 
15 | def get_extrinsics(
16 |     cfg: ExtrinsicsCfg,
17 |     num_frames: int | None,
18 | ) -> Extrinsics:
19 |     return EXTRINSICS[cfg.name](cfg, num_frames)
20 | 


--------------------------------------------------------------------------------
/egomono4d/model/intrinsics/__init__.py:
--------------------------------------------------------------------------------
 1 | from .intrinsics import Intrinsics
 2 | from .intrinsics_ground_truth import IntrinsicsGroundTruth, IntrinsicsGroundTruthCfg
 3 | from .intrinsics_regressed import IntrinsicsRegressed, IntrinsicsRegressedCfg
 4 | from .intrinsics_softmin import IntrinsicsSoftmin, IntrinsicsSoftminCfg
 5 | from .intrinsics_model import IntrinsicsModel, IntrinsicsModelCfg
 6 | 
 7 | INTRINSICS = {
 8 |     "ground_truth": IntrinsicsGroundTruth,
 9 |     "regressed": IntrinsicsRegressed,
10 |     "softmin": IntrinsicsSoftmin,
11 |     "model": IntrinsicsModel
12 | }
13 | 
14 | IntrinsicsCfg = IntrinsicsRegressedCfg | IntrinsicsGroundTruthCfg | IntrinsicsSoftminCfg | \
15 |     IntrinsicsModelCfg
16 | 
17 | 
18 | def get_intrinsics(cfg: IntrinsicsCfg) -> Intrinsics:
19 |     return INTRINSICS[cfg.name](cfg)
20 | 


--------------------------------------------------------------------------------
/egomono4d/model/intrinsics/intrinsics.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Generic, TypeVar
 3 | 
 4 | from jaxtyping import Float
 5 | from torch import Tensor, nn
 6 | 
 7 | from ...dataset.types import Batch
 8 | from ...flow.flow_predictor import Flows
 9 | from ...tracking.track_predictor import Tracks
10 | from ..backbone.backbone import BackboneOutput
11 | 
12 | T = TypeVar("T")
13 | 
14 | 
15 | class Intrinsics(nn.Module, ABC, Generic[T]):
16 |     cfg: T
17 | 
18 |     def __init__(self, cfg: T) -> None:
19 |         super().__init__()
20 |         self.cfg = cfg
21 | 
22 |     @abstractmethod
23 |     def forward(
24 |         self,
25 |         batch: Batch,
26 |         flows: Flows | list[Tracks],
27 |         backbone_output: BackboneOutput,
28 |         global_step: int,
29 |     ) -> Float[Tensor, "batch frame 3 3"]:
30 |         pass
31 | 


--------------------------------------------------------------------------------
/egomono4d/visualization/depth.py:
--------------------------------------------------------------------------------
 1 | from jaxtyping import Float
 2 | from torch import Tensor
 3 | 
 4 | from .color import apply_color_map_to_image
 5 | 
 6 | 
 7 | def color_map_depth(
 8 |     depth: Float[Tensor, "batch height width"],
 9 |     cmap: str = "inferno",
10 |     invert: bool = True,
11 |     log_first: bool = False
12 | ) -> Float[Tensor, "batch 3 height width"]:
13 |     mask = (depth == 0)
14 |     if log_first is True:
15 |         # for depth estimation, we first get log for convinient visualization.
16 |         depth = depth.log()
17 |     # Normalize the depth.
18 |     far = depth.max()
19 |     depth = depth + mask * 1e9
20 |     near = depth.min()
21 |     depth = (depth - near) / (far - near)
22 |     depth = depth.clip(min=0, max=1)
23 |     depth[mask] = 0
24 |     if invert:
25 |         depth = 1 - depth
26 |     return apply_color_map_to_image(depth, cmap)
27 | 


--------------------------------------------------------------------------------
/egomono4d/misc/ate.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from jaxtyping import Float
 3 | from scipy import spatial
 4 | from torch import Tensor
 5 | 
 6 | 
 7 | def compute_ate(
 8 |     gt: Float[Tensor, "point 3"],
 9 |     predicted: Float[Tensor, "point 3"],
10 | ) -> tuple[
11 |     Float[Tensor, ""],  # ate
12 |     Float[Tensor, "point 3"],  # aligned gt
13 |     Float[Tensor, "point 3"],  # aligned predicted
14 | ]:
15 |     aligned_gt, aligned_predicted, _ = spatial.procrustes(
16 |         gt.detach().cpu().numpy(),
17 |         predicted.cpu().numpy(),
18 |     )
19 |     aligned_gt = torch.tensor(aligned_gt, dtype=torch.float32, device=gt.device)
20 |     aligned_predicted = torch.tensor(
21 |         aligned_predicted, dtype=torch.float32, device=predicted.device
22 |     )
23 | 
24 |     ate = ((aligned_gt - aligned_predicted) ** 2).mean() ** 0.5
25 |     return ate, aligned_gt, aligned_predicted
26 | 


--------------------------------------------------------------------------------
/egomono4d/model/intrinsics/intrinsics_ground_truth.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | from jaxtyping import Float
 5 | from torch import Tensor
 6 | 
 7 | from ...dataset.types import Batch
 8 | from ...flow.flow_predictor import Flows
 9 | from ..backbone.backbone import BackboneOutput
10 | from .intrinsics import Intrinsics
11 | from ...tracking.track_predictor import Tracks
12 | 
13 | 
14 | @dataclass
15 | class IntrinsicsGroundTruthCfg:
16 |     name: Literal["ground_truth"]
17 | 
18 | 
19 | class IntrinsicsGroundTruth(Intrinsics[IntrinsicsGroundTruthCfg]):
20 |     def forward(
21 |         self,
22 |         batch: Batch,
23 |         flows: Flows | list[Tracks],
24 |         backbone_output: BackboneOutput,
25 |         global_step: int,
26 |     ) -> Float[Tensor, "batch frame 3 3"]:
27 |         # Just return the ground-truth intrinsics.
28 |         return batch.intrinsics
29 | 


--------------------------------------------------------------------------------
/egomono4d/visualization/color.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from einops import rearrange
 3 | from jaxtyping import Float
 4 | from matplotlib import cm
 5 | from torch import Tensor
 6 | 
 7 | 
 8 | def apply_color_map(
 9 |     x: Float[Tensor, " *batch"],
10 |     color_map: str = "inferno",
11 | ) -> Float[Tensor, "*batch 3"]:
12 |     cmap = cm.get_cmap(color_map)
13 | 
14 |     # Convert to NumPy so that Matplotlib color maps can be used.
15 |     mapped = cmap(x.detach().clip(min=0, max=1).cpu().numpy())[..., :3]
16 | 
17 |     # Convert back to the original format.
18 |     return torch.tensor(mapped, device=x.device, dtype=torch.float32)
19 | 
20 | 
21 | def apply_color_map_to_image(
22 |     image: Float[Tensor, "*batch height width"],
23 |     color_map: str = "inferno",
24 | ) -> Float[Tensor, "*batch 3 height with"]:
25 |     image = apply_color_map(image, color_map)
26 |     return rearrange(image, "... h w c -> ... c h w")
27 | 


--------------------------------------------------------------------------------
/egomono4d/model/extrinsics/extrinsics.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Generic, TypeVar
 3 | 
 4 | from jaxtyping import Float
 5 | from torch import Tensor, nn
 6 | 
 7 | from ...dataset.types import Batch
 8 | from ...flow.flow_predictor import Flows
 9 | from ..backbone.backbone import BackboneOutput
10 | 
11 | T = TypeVar("T")
12 | 
13 | 
14 | class Extrinsics(nn.Module, ABC, Generic[T]):
15 |     cfg: T
16 |     num_frames: int | None
17 | 
18 |     def __init__(self, cfg: T, num_frames: int | None) -> None:
19 |         super().__init__()
20 |         self.cfg = cfg
21 |         self.num_frames = num_frames
22 | 
23 |     @abstractmethod
24 |     def forward(
25 |         self,
26 |         batch: Batch,
27 |         flows: Flows,
28 |         backbone_output: BackboneOutput,
29 |         surfaces: Float[Tensor, "batch frame height width 3"],
30 |     ) -> Float[Tensor, "batch frame 4 4"]:
31 |         pass
32 | 


--------------------------------------------------------------------------------
/egomono4d/frame_sampler/frame_sampler.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Generic, TypeVar
 3 | 
 4 | import torch
 5 | from jaxtyping import Int64
 6 | from torch import Tensor
 7 | 
 8 | T = TypeVar("T")
 9 | 
10 | 
11 | class FrameSampler(ABC, Generic[T]):
12 |     """A frame sampler picks the frames that should be sampled from a dataset's video.
13 |     It makes sense to break the logic for frame sampling into an interface because
14 |     pre-training and fine-tuning require different frame sampling strategies (generally,
15 |     whole video vs. batch of video segments of same length).
16 |     """
17 | 
18 | 
19 |     def __init__(self, num_frames, stage) -> None:
20 |         self.num_frames = num_frames
21 |         self.stage = stage
22 | 
23 |     @abstractmethod
24 |     def sample(
25 |         self,
26 |         num_frames_in_video: int,
27 |         device: torch.device,
28 |     ) -> Int64[Tensor, " frame"]:  # frame indices
29 |         pass
30 | 


--------------------------------------------------------------------------------
/egomono4d/loss/mapping/mapping_huber.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | import torch
 5 | import torch.nn.functional as F
 6 | from jaxtyping import Float
 7 | from torch import Tensor
 8 | 
 9 | from .mapping import Mapping
10 | 
11 | 
12 | @dataclass
13 | class MappingHuberCfg:
14 |     name: Literal["huber"]
15 |     delta: float
16 | 
17 | 
18 | class MappingHuber(Mapping[MappingHuberCfg]):
19 |     def forward_undistorted(
20 |         self,
21 |         delta: Float[Tensor, "*batch 2"],
22 |     ) -> Float[Tensor, " *batch"]:
23 |         norm = delta.norm(dim=-1)
24 | 
25 |         mapped = F.huber_loss(
26 |             norm,
27 |             torch.zeros_like(norm),
28 |             reduction="none",
29 |             delta=self.cfg.delta,
30 |         )
31 | 
32 |         # Divide by the delta so that the gradient magnitude in the linear region
33 |         # matches that of a regular L1 loss.
34 |         return mapped / self.cfg.delta
35 | 


--------------------------------------------------------------------------------
/egomono4d/visualization/visualizer.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Generic, TypeVar
 3 | 
 4 | from jaxtyping import Float
 5 | from torch import Tensor
 6 | 
 7 | from ..dataset.types import Batch
 8 | from ..flow import Flows
 9 | from ..model.model import Model, ModelOutput
10 | from ..tracking import Tracks
11 | 
12 | T = TypeVar("T")
13 | 
14 | 
15 | class Visualizer(ABC, Generic[T]):
16 |     cfg: T
17 | 
18 |     def __init__(self, cfg: T) -> None:
19 |         super().__init__()
20 |         self.cfg = cfg
21 |         self.select_indices = None
22 |         self.select_scenes = None
23 | 
24 |     @abstractmethod
25 |     def visualize(
26 |         self,
27 |         batch: Batch,
28 |         flows: Flows,
29 |         tracks: list[Tracks] | None,
30 |         model_output: ModelOutput,
31 |         model: Model,
32 |         global_step: int,
33 |         current_epoch: int
34 |     ) -> dict[str, Float[Tensor, "3 _ _"] | Float[Tensor, ""]]:
35 |         pass
36 | 


--------------------------------------------------------------------------------
/egomono4d/flow/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..dataset.types import Batch
 4 | from ..misc.nn_module_tools import convert_to_buffer
 5 | from .flow_predictor import FlowPredictor, Flows
 6 | from .flow_predictor_gmflow import FlowPredictorGMFlow, FlowPredictorGMFlowCfg
 7 | 
 8 | FLOW_PREDICTORS = {
 9 |     "gmflow": FlowPredictorGMFlow,
10 | }
11 | 
12 | FlowPredictorCfg = FlowPredictorGMFlowCfg
13 | 
14 | 
15 | def get_flow_predictor(cfg: FlowPredictorCfg) -> FlowPredictor:
16 |     flow_predictor = FLOW_PREDICTORS[cfg.name](cfg)
17 |     convert_to_buffer(flow_predictor, persistent=False)
18 |     return flow_predictor
19 | 
20 | 
21 | @torch.no_grad()
22 | def compute_flows(
23 |     batch: Batch,
24 |     flow_shape: tuple[int, int],
25 |     device: torch.device,
26 |     cfg: FlowPredictorCfg,
27 | ) -> Flows:
28 |     print("Precomputing optical flow.")
29 |     flow_predictor = get_flow_predictor(cfg)
30 |     flow_predictor.to(device)
31 |     return flow_predictor.compute_bidirectional_flow(batch.to(device), flow_shape)
32 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.ruff]
 2 | # Enable Pyflakes `E` and `F` codes by default.
 3 | select = ["E", "F", "I"]
 4 | ignore = ["F722"] # Ignore F722 for jaxtyping compatibility.
 5 | 
 6 | # Allow autofix for all enabled rules (when `--fix`) is provided.
 7 | fixable = ["A", "B", "C", "D", "E", "F", "I"]
 8 | unfixable = []
 9 | 
10 | # Exclude a variety of commonly ignored directories.
11 | exclude = [
12 |     ".bzr",
13 |     ".direnv",
14 |     ".eggs",
15 |     ".git",
16 |     ".hg",
17 |     ".mypy_cache",
18 |     ".nox",
19 |     ".pants.d",
20 |     ".ruff_cache",
21 |     ".svn",
22 |     ".tox",
23 |     ".venv",
24 |     "__pypackages__",
25 |     "_build",
26 |     "buck-out",
27 |     "build",
28 |     "dist",
29 |     "node_modules",
30 |     "venv",
31 | ]
32 | per-file-ignores = {}
33 | 
34 | # Same as Black.
35 | line-length = 88
36 | 
37 | # Allow unused variables when underscore-prefixed.
38 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
39 | 
40 | # Assume Python 3.10.
41 | target-version = "py310"
42 | 
43 | [tool.ruff.mccabe]
44 | # Unlike Flake8, default to a complexity level of 10.
45 | max-complexity = 10
46 | 


--------------------------------------------------------------------------------
/config/dataset/hoi4d.yaml:
--------------------------------------------------------------------------------
 1 | hoi4d:
 2 |   # Common configuration items (all datasets have these)
 3 |   scene: null
 4 |   cache_dir: null
 5 |   resize_shape: null
 6 |   patch_size: null
 7 |   num_frames: null
 8 |   all_frames: false 
 9 |   use_gt_depth: false
10 | 
11 |   mask_estimation: null
12 |   mask_flow_model: null
13 |   mask_binary_open_value: null 
14 | 
15 |   frame_sampler: pretrain_interval
16 |   frame_max_interval: 4
17 | 
18 |   # Dataset-specific configuration items
19 |   mask_blur_radius: 10.0
20 |   clip_t: 2.0                                          # duration of each clip (sec)
21 |   clip_interval: 1.0                                   # interval of each clip (sec)
22 |   clip_max_n: 20                                       # max number of frame of each clip 
23 |   meta_file: ./cache/original_datasets/HOI4D/hoi4d_release.txt
24 |   rgb_root: ./cache/original_datasets/HOI4D/HOI4D_release
25 |   depth_root: ./cache/original_datasets/HOI4D/HOI4D_depth_video
26 |   anno_root: ./cache/original_datasets/HOI4D/HOI4D_annotations
27 |   cam_root: ./cache/original_datasets/HOI4D/camera_params
28 |   pre_save_root: ./cache/processed_datasets   
29 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def flow_loss_func(flow_preds, flow_gt, valid,
 5 |                    gamma=0.9,
 6 |                    max_flow=400,
 7 |                    **kwargs,
 8 |                    ):
 9 |     n_predictions = len(flow_preds)
10 |     flow_loss = 0.0
11 | 
12 |     # exlude invalid pixels and extremely large diplacements
13 |     mag = torch.sum(flow_gt ** 2, dim=1).sqrt()  # [B, H, W]
14 |     valid = (valid >= 0.5) & (mag < max_flow)
15 | 
16 |     for i in range(n_predictions):
17 |         i_weight = gamma ** (n_predictions - i - 1)
18 | 
19 |         i_loss = (flow_preds[i] - flow_gt).abs()
20 | 
21 |         flow_loss += i_weight * (valid[:, None] * i_loss).mean()
22 | 
23 |     epe = torch.sum((flow_preds[-1] - flow_gt) ** 2, dim=1).sqrt()
24 | 
25 |     if valid.max() < 0.5:
26 |         pass
27 | 
28 |     epe = epe.view(-1)[valid.view(-1)]
29 | 
30 |     metrics = {
31 |         'epe': epe.mean().item(),
32 |         '1px': (epe > 1).float().mean().item(),
33 |         '3px': (epe > 3).float().mean().item(),
34 |         '5px': (epe > 5).float().mean().item(),
35 |     }
36 | 
37 |     return flow_loss, metrics
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Cameron Smith, David Charatan, Ayush Tewari, and Vincent Sitzmann
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/egomono4d/model/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from dataclasses import dataclass
 3 | from typing import Generic, TypeVar
 4 | 
 5 | from jaxtyping import Float
 6 | from torch import Tensor, nn
 7 | 
 8 | from ...dataset.types import Batch
 9 | from ...flow.flow_predictor import Flows
10 | from ...tracking.track_predictor import Tracks
11 | 
12 | T = TypeVar("T")
13 | 
14 | 
15 | @dataclass
16 | class BackboneOutput:
17 |     depths: Float[Tensor, "batch frame height width"]    
18 |     weights: Float[Tensor, "batch frame-1 height width"]
19 |     intrinsics: tuple[Float[Tensor, "batch 2"], Float[Tensor, "batch 2"]] | None  # (focal, principle)
20 | 
21 | 
22 | class Backbone(nn.Module, ABC, Generic[T]):
23 |     cfg: T
24 | 
25 |     def __init__(
26 |         self,
27 |         cfg: T,
28 |         num_frames: int | None,
29 |         image_shape: tuple[int, int] | None,
30 |         patch_size: tuple[int, int] | None,
31 |     ) -> None:
32 |         super().__init__()
33 |         self.cfg = cfg
34 |         self.num_frames = num_frames
35 |         self.image_shape = image_shape
36 |         self.patch_size = patch_size
37 | 
38 |     @abstractmethod
39 |     def forward(self, batch: Batch, flows: Flows | list[Tracks]) -> BackboneOutput:
40 |         pass
41 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import sys
 4 | import json
 5 | 
 6 | 
 7 | def read_text_lines(filepath):
 8 |     with open(filepath, 'r') as f:
 9 |         lines = f.readlines()
10 |     lines = [l.rstrip() for l in lines]
11 |     return lines
12 | 
13 | 
14 | def check_path(path):
15 |     if not os.path.exists(path):
16 |         os.makedirs(path, exist_ok=True)  # explicitly set exist_ok when multi-processing
17 | 
18 | 
19 | def save_command(save_path, filename='command_train.txt'):
20 |     check_path(save_path)
21 |     command = sys.argv
22 |     save_file = os.path.join(save_path, filename)
23 |     # Save all training commands when resuming training
24 |     with open(save_file, 'a') as f:
25 |         f.write(' '.join(command))
26 |         f.write('\n\n')
27 | 
28 | 
29 | def save_args(args, filename='args.json'):
30 |     args_dict = vars(args)
31 |     check_path(args.checkpoint_dir)
32 |     save_path = os.path.join(args.checkpoint_dir, filename)
33 | 
34 |     # Save all training args when resuming training
35 |     with open(save_path, 'a') as f:
36 |         json.dump(args_dict, f, indent=4, sort_keys=False)
37 |         f.write('\n\n')
38 | 
39 | 
40 | def int_list(s):
41 |     """Convert string to int list"""
42 |     return [int(x) for x in s.split(',')]
43 | 


--------------------------------------------------------------------------------
/egomono4d/model/intrinsics/intrinsics_regressed.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | import torch
 5 | from einops import repeat
 6 | from jaxtyping import Float
 7 | from torch import Tensor, nn
 8 | 
 9 | from ...dataset.types import Batch
10 | from ...flow.flow_predictor import Flows
11 | from ..backbone.backbone import BackboneOutput
12 | from .common import focal_lengths_to_intrinsics
13 | from .intrinsics import Intrinsics
14 | 
15 | 
16 | @dataclass
17 | class IntrinsicsRegressedCfg:
18 |     name: Literal["regressed"]
19 |     initial_focal_length: float
20 | 
21 | 
22 | class IntrinsicsRegressed(Intrinsics[IntrinsicsRegressedCfg]):
23 |     def __init__(self, cfg: IntrinsicsRegressedCfg) -> None:
24 |         super().__init__(cfg)
25 |         focal_length = torch.full(
26 |             tuple(),
27 |             cfg.initial_focal_length,
28 |             dtype=torch.float32,
29 |         )
30 |         self.focal_length = nn.Parameter(focal_length)
31 | 
32 |     def forward(
33 |         self,
34 |         batch: Batch,
35 |         flows: Flows,
36 |         backbone_output: BackboneOutput,
37 |         global_step: int,
38 |     ) -> Float[Tensor, "batch frame 3 3"]:
39 |         b, f, _, h, w = batch.videos.shape
40 |         intrinsics = focal_lengths_to_intrinsics(self.focal_length, (h, w))
41 |         return repeat(intrinsics, "i j -> b f i j", b=b, f=f)
42 | 


--------------------------------------------------------------------------------
/egomono4d/misc/local_logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from typing import Any, Optional
 4 | 
 5 | from lightning.pytorch.loggers import Logger
 6 | from lightning.pytorch.utilities import rank_zero_only
 7 | from PIL import Image
 8 | 
 9 | LOG_PATH = Path("outputs/local")
10 | 
11 | 
12 | class LocalLogger(Logger):
13 |     def __init__(self) -> None:
14 |         super().__init__()
15 |         self.experiment = None
16 |         os.system(f"rm -r {LOG_PATH}")
17 | 
18 |     @property
19 |     def name(self):
20 |         return "LocalLogger"
21 | 
22 |     @property
23 |     def version(self):
24 |         return 0
25 | 
26 |     @rank_zero_only
27 |     def log_hyperparams(self, params):
28 |         pass
29 | 
30 |     @rank_zero_only
31 |     def log_metrics(self, metrics, step):
32 |         pass
33 | 
34 |     @rank_zero_only
35 |     def log_image(
36 |         self,
37 |         key: str,
38 |         images: list[Any],
39 |         step: Optional[int] = None,
40 |         **kwargs,
41 |     ):
42 |         # The function signature is the same as the wandb logger's, but the step is
43 |         # actually required.
44 |         assert step is not None
45 |         for index, image in enumerate(images):
46 |             path = LOG_PATH / f"{key}/{index:0>2}_{step:0>6}.png"
47 |             path.parent.mkdir(exist_ok=True, parents=True)
48 |             Image.fromarray(image).save(path)
49 | 


--------------------------------------------------------------------------------
/egomono4d/dataset/dataset_merged.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import pdb
 4 | from typing import List
 5 | from .types import Stage
 6 | from torch.utils.data import Dataset
 7 | 
 8 | 
 9 | class DatasetMerged(Dataset):
10 | 
11 |     def __init__(self, 
12 |                  datasets: List[Dataset],
13 |                  stage: Stage,
14 |                  global_rank: int,
15 |                  world_size: int,
16 |                  data_ratio: float=1.0
17 |                 ) -> None:
18 |         self.datasets = datasets
19 |         self.stage = stage
20 |         self.global_rank = global_rank
21 |         self.world_size = world_size
22 |         index_list = []
23 |         
24 |         for ids, dataset in enumerate(self.datasets):
25 |             index_list = index_list + [(ids, i) for i in range(int(len(dataset)*data_ratio))]
26 |         
27 |         random.seed(0)
28 |         random.shuffle(index_list)
29 |         self.index_list = index_list
30 | 
31 |         print(f"################### [Stage {stage}: Num Data = {len(self.index_list)}] ###################")
32 | 
33 | 
34 |     def __len__(self):
35 |         return len(self.index_list)
36 | 
37 |     def __getitem__(self, index):
38 |         dataset_id, data_id = self.index_list[index]
39 |         # print(f"[Data Go] global_rank={self.global_rank} | dataloader_index={index} | data_index={(dataset_id, data_id)}")
40 |         return self.datasets[dataset_id][data_id]
41 | 


--------------------------------------------------------------------------------
/egomono4d/model/intrinsics/intrinsics_model.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | from jaxtyping import Float
 5 | from torch import Tensor
 6 | import pdb
 7 | import torch
 8 | 
 9 | from ...dataset.types import Batch, BatchInference
10 | from ...flow.flow_predictor import Flows
11 | from ..backbone.backbone import BackboneOutput
12 | from .intrinsics import Intrinsics
13 | from ...tracking.track_predictor import Tracks
14 | 
15 | 
16 | @dataclass
17 | class IntrinsicsModelCfg:
18 |     name: Literal["model"]
19 | 
20 | 
21 | class IntrinsicsModel(Intrinsics[IntrinsicsModelCfg]):
22 |     def forward(
23 |         self,
24 |         batch: Batch | BatchInference,
25 |         flows: Flows | list[Tracks],
26 |         backbone_output: BackboneOutput,
27 |         global_step: int,
28 |     ) -> Float[Tensor, "batch frame 3 3"]:
29 |         # Just return the ground-truth intrinsics.
30 |         # pdb.set_trace()
31 |         b, f, _, h, w = batch.videos.shape
32 |         focal, principle = backbone_output.intrinsics
33 |         focal = focal * (h * w) ** 0.5
34 |         intrinsics = torch.stack([torch.eye(3, dtype=torch.float32, device=focal.device)]*b, dim=0)
35 |         intrinsics = torch.stack([intrinsics]*f, dim=1)
36 |         intrinsics[..., :2, 2] = principle.unsqueeze(-2)
37 |         intrinsics[..., 0, 0] = focal[..., 0].unsqueeze(-1) / w
38 |         intrinsics[..., 1, 1] = focal[..., 1].unsqueeze(-1) / h
39 |         return intrinsics


--------------------------------------------------------------------------------
/egomono4d/dataset/types.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | # from typing import Literal
 3 | from typing_extensions import Literal
 4 | from typing import List, Union, Optional
 5 | 
 6 | from jaxtyping import Float, Int64, Int32
 7 | from torch import Tensor
 8 | 
 9 | from ..misc.manipulable import Manipulable
10 | 
11 | Stage = Literal["train", "test", "val"]
12 | 
13 | 
14 | @dataclass
15 | class Batch(Manipulable):
16 |     videos: Float[Tensor, "batch frame 3 height width"]
17 |     depths: Float[Tensor, "batch frame height width"]  
18 |     pcds: Float[Tensor, "batch frame height width 3"] 
19 |     flys: Float[Tensor, "batch frame height width"] 
20 |     masks: Float[Tensor, "batch frame height width"] 
21 |     indices: Int64[Tensor, "batch frame"]
22 | 
23 |     scenes: Union[List[str], str] 
24 |     datasets: Union[List[str], str] 
25 |     use_gt_depth: bool
26 |     
27 |     intrinsics: Optional[Float[Tensor, "batch frame 3 3"]] = None
28 | 
29 |     gt_depths: Optional[Float[Tensor, "batch frame height width"]] = None
30 |     gt_intrinsics: Optional[Float[Tensor, "batch frame 3 3"]] = None
31 |     gt_extrinsics: Optional[Float[Tensor, "batch frame 4 4"]] = None
32 |     hoi_masks: Optional[Float[Tensor, "batch frame height width"]] = None
33 | 
34 | 
35 | @dataclass
36 | class BatchInference(Manipulable):
37 |     videos: Float[Tensor, "batch frame 3 height width"]
38 |     start_indice: Int32
39 |     aux_masks: Optional[Float[Tensor, "batch frame height width"]]  = None
40 | 


--------------------------------------------------------------------------------
/egomono4d/utils.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from einops import einsum, rearrange
 3 | from .model.projection import sample_image_grid, unproject, homogenize_points
 4 | 
 5 | 
 6 | def load_pickle(pickle_file):
 7 |     try:
 8 |         with open(pickle_file, 'rb') as f:
 9 |             pickle_data = pickle.load(f)
10 |     except UnicodeDecodeError as e:
11 |         with open(pickle_file, 'rb') as f:
12 |             pickle_data = pickle.load(f, encoding='latin1')
13 |     except Exception as e:
14 |         print('Unable to load data ', pickle_file, ':', e)
15 |         raise
16 |     return pickle_data
17 | 
18 | 
19 | def save_pickle(pickle_file, data):
20 |     with open(pickle_file, 'wb') as pfile:
21 |         pickle.dump(data, pfile)
22 | 
23 | 
24 | def batch_recover_pointclouds_sequence(depths, intrinsics, extrinsics, target_frame=0):
25 |     b, f, h, w = depths.shape
26 |     xy, _ = sample_image_grid((h, w), device=depths.device)
27 |     gt_pcds_unp = unproject(xy, depths, rearrange(intrinsics, "b f i j -> b f () () i j"))
28 | 
29 |     extrinsics_source = rearrange(extrinsics, "b fs i j -> b fs () () i j")
30 |     extrinsics_target = rearrange(extrinsics[:, target_frame:target_frame+1], "b ft i j -> b () ft () i j")
31 |     relative_transformations = extrinsics_target.inverse() @ extrinsics_source
32 | 
33 |     pcds = einsum(
34 |         relative_transformations,
35 |         homogenize_points(gt_pcds_unp),
36 |         "... i j, ... j -> ... i",
37 |     )[..., :3]
38 | 
39 |     return pcds


--------------------------------------------------------------------------------
/egomono4d/model/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbone import Backbone
 2 | from .backbone_explicit_depth import BackboneExplicitDepth, BackboneExplicitDepthCfg
 3 | from .backbone_midas import BackboneMidas, BackboneMidasCfg
 4 | from .backbone_unidepth import BackboneUniDepth, BackboneUniDepthCfg
 5 | from .backbone_nvds_unidepth import BackboneNvdsUniDepth, BackboneNvdsUniDepthCfg
 6 | 
 7 | try:
 8 |     from .backbone_depthanythingv2 import BackboneDepthanythingV2, BackboneDepthanythingV2Cfg
 9 |     from .backbone_nvds_unet_dpt import BackboneNvdsUnetDPT, BackboneNvdsUnetDPTCfg
10 | except:
11 |     BackboneDepthanythingV2 = None
12 |     BackboneDepthanythingV2Cfg = None 
13 |     BackboneNvdsUnetDPT = None
14 |     BackboneNvdsUnetDPTCfg = None
15 | 
16 | BACKBONES = {
17 |     "explicit_depth": BackboneExplicitDepth,
18 |     "midas": BackboneMidas,
19 |     "unidepth": BackboneUniDepth,
20 |     "depthanythingv2": BackboneDepthanythingV2,
21 |     "nvds_unet_dpt": BackboneNvdsUnetDPT,
22 |     "nvds_unidepth": BackboneNvdsUniDepth
23 | }
24 | 
25 | BackboneCfg = BackboneExplicitDepthCfg | BackboneMidasCfg | BackboneNvdsUniDepthCfg | \
26 |     BackboneNvdsUnetDPTCfg | BackboneDepthanythingV2Cfg | BackboneUniDepthCfg
27 | 
28 | 
29 | def get_backbone(
30 |     cfg: BackboneCfg,
31 |     num_frames: int | None,
32 |     image_shape: tuple[int, int] | None,
33 |     patch_size: tuple[int, int] | None = None,
34 | ) -> Backbone:
35 |     return BACKBONES[cfg.name](cfg, num_frames, image_shape, patch_size)
36 | 


--------------------------------------------------------------------------------
/egomono4d/model/backbone/backbone_explicit_depth.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | from ...dataset.types import Batch
 8 | from ...flow.flow_predictor import Flows
 9 | from .backbone import Backbone, BackboneOutput
10 | 
11 | 
12 | @dataclass
13 | class BackboneExplicitDepthCfg:
14 |     name: Literal["explicit_depth"]
15 |     initial_depth: float
16 |     weight_sensitivity: float
17 | 
18 | 
19 | class BackboneExplicitDepth(Backbone[BackboneExplicitDepthCfg]):
20 |     def __init__(
21 |         self,
22 |         cfg: BackboneExplicitDepthCfg,
23 |         num_frames: int | None,
24 |         image_shape: tuple[int, int] | None,
25 |         patch_size: tuple[int, int] | None,
26 |     ) -> None:
27 |         super().__init__(cfg, num_frames=num_frames, image_shape=image_shape, patch_size=patch_size)
28 |         depth = torch.full(
29 |             (num_frames, *image_shape), cfg.initial_depth, dtype=torch.float32
30 |         )
31 |         self.depth = nn.Parameter(depth)
32 |         weights = torch.full((num_frames - 1, *image_shape), 0, dtype=torch.float32)
33 |         self.weights = nn.Parameter(weights)
34 | 
35 |     def forward(self, batch: Batch, flows: Flows) -> BackboneOutput:
36 |         b, _, _, _, _ = batch.videos.shape
37 |         assert b == 1
38 | 
39 |         return BackboneOutput(
40 |             self.depth[None],
41 |             (self.cfg.weight_sensitivity * self.weights).sigmoid()[None],
42 |         )
43 | 


--------------------------------------------------------------------------------
/egomono4d/misc/disk_cache.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import json
 3 | import os
 4 | from typing import Optional
 5 | from pathlib import Path
 6 | from typing import Any, Callable, TypeVar
 7 | 
 8 | import torch
 9 | 
10 | T = TypeVar("T")
11 | 
12 | 
13 | def make_cache(location: Optional[Path] = None):
14 |     if not os.path.exists(location):
15 |         os.makedirs(location, exist_ok=True)
16 |     def cache(key: Any, device, fallback: Callable[[], T]) -> T:
17 |         # If there's no cache location, the cache is disabled.
18 |         if location is None:
19 |             return fallback()
20 | 
21 |         key_str = hashlib.sha256(json.dumps(key).encode("utf-8")).digest().hex()
22 | 
23 |         path = location + "/" + f"{key_str}.torch"
24 |         try:
25 |             # Attempt to load the cached item.
26 |             key_loaded, value = torch.load(path, map_location=device)
27 | 
28 |             # If there was a hash collision and the keys don't actually match, throw an
29 |             # error so that the fallback can be used.
30 |             if key != key_loaded:
31 |                 raise ValueError("Keys did not match!")
32 | 
33 |             return value
34 |         except (FileNotFoundError, ValueError):
35 |             # Use the fallback to compute the value.
36 |             value = fallback()
37 | 
38 |             # Cache the value.
39 |             # path.parent.mkdir(exist_ok=True, parents=True)
40 |             torch.save((key, value), path)
41 | 
42 |             return value
43 | 
44 |     return cache
45 | 


--------------------------------------------------------------------------------
/egomono4d/loss/loss_cc.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | import torch
 4 | import pdb
 5 | 
 6 | from jaxtyping import Float
 7 | from torch import Tensor
 8 | import torch.nn.functional as F
 9 | from torchvision.utils import save_image
10 | 
11 | from ..dataset.types import Batch
12 | from ..flow import Flows
13 | from ..model.model import ModelOutput
14 | from ..tracking import Tracks
15 | from .loss import Loss, LossCfgCommon
16 | 
17 | @dataclass
18 | class LossCCCfg(LossCfgCommon):    # CC: Clip Consistency
19 |     name: Literal["cc"]
20 | 
21 | 
22 | class LossCC(Loss[LossCCCfg]):
23 |     def __init__(self, cfg: LossCCCfg) -> None:
24 |         super().__init__(cfg)
25 |         self.loss = torch.nn.L1Loss(reduction="none")
26 | 
27 |     def compute_unweighted_loss(
28 |         self,
29 |         batch: Batch,
30 |         flows: Flows,
31 |         tracks: list[Tracks] | None,
32 |         model_output: ModelOutput,
33 |         current_epoch: int,
34 |         return_val: bool
35 |     ) -> tuple[Float[Tensor, ""], dict]:
36 |         
37 |         intrinsics = model_output.intrinsics
38 |         b, f, _, _ = intrinsics.shape
39 |         assert b % 2 == 0
40 |         rb = b // 2
41 | 
42 |         intrinsics_subclip_1 = intrinsics[::2]     # (b//2, f, 3, 3)
43 |         intrinsics_subclip_2 = intrinsics[1::2]    # (b//2, f, 3, 3)
44 |         loss = self.loss(intrinsics_subclip_1, intrinsics_subclip_2)
45 |         loss = loss.sum() / (rb * f)                 # fx, fy, cx, cy 
46 | 
47 |         return loss, {"cc": loss}


--------------------------------------------------------------------------------
/egomono4d/visualization/drawing/coordinate_conversion.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Protocol, runtime_checkable
 2 | 
 3 | import torch
 4 | from jaxtyping import Float
 5 | from torch import Tensor
 6 | 
 7 | from .types import Pair, sanitize_pair
 8 | 
 9 | 
10 | @runtime_checkable
11 | class ConversionFunction(Protocol):
12 |     def __call__(
13 |         self,
14 |         xy: Float[Tensor, "*batch 2"],
15 |     ) -> Float[Tensor, "*batch 2"]:
16 |         pass
17 | 
18 | 
19 | def generate_conversions(
20 |     shape: tuple[int, int],
21 |     device: torch.device,
22 |     x_range: Optional[Pair] = None,
23 |     y_range: Optional[Pair] = None,
24 | ) -> tuple[
25 |     ConversionFunction,  # conversion from world coordinates to pixel coordinates
26 |     ConversionFunction,  # conversion from pixel coordinates to world coordinates
27 | ]:
28 |     h, w = shape
29 |     x_range = sanitize_pair((0, w) if x_range is None else x_range, device)
30 |     y_range = sanitize_pair((0, h) if y_range is None else y_range, device)
31 |     minima, maxima = torch.stack((x_range, y_range), dim=-1)
32 |     wh = torch.tensor((w, h), dtype=torch.float32, device=device)
33 | 
34 |     def convert_world_to_pixel(
35 |         xy: Float[Tensor, "*batch 2"],
36 |     ) -> Float[Tensor, "*batch 2"]:
37 |         return (xy - minima) / (maxima - minima) * wh
38 | 
39 |     def convert_pixel_to_world(
40 |         xy: Float[Tensor, "*batch 2"],
41 |     ) -> Float[Tensor, "*batch 2"]:
42 |         return xy / wh * (maxima - minima) + minima
43 | 
44 |     return convert_world_to_pixel, convert_pixel_to_world
45 | 


--------------------------------------------------------------------------------
/egomono4d/misc/config_tools.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from pathlib import Path
 3 | from typing import Type, TypeVar
 4 | 
 5 | from dacite import Config, from_dict
 6 | from omegaconf import DictConfig, OmegaConf
 7 | 
 8 | TYPE_HOOKS = {
 9 |     Path: Path,
10 | }
11 | 
12 | 
13 | T = TypeVar("T")
14 | 
15 | 
16 | def get_typed_config(
17 |     data_class: Type[T],
18 |     cfg: DictConfig,
19 |     extra_type_hooks: dict = {},
20 | ) -> T:
21 |     return from_dict(
22 |         data_class,
23 |         OmegaConf.to_container(cfg),
24 |         config=Config(type_hooks={**TYPE_HOOKS, **extra_type_hooks}, cast=[tuple]),
25 |     )
26 | 
27 | 
28 | def separate_multiple_defaults(data_class_union):
29 |     """Return a function that will pull individual configurations out of a merged dict.
30 |     For example, the merged dict might look like this:
31 | 
32 |     {
33 |         a: ...
34 |         b: ...
35 |     }
36 | 
37 |     The returned function will generate this:
38 | 
39 |     [{ name: a, ... }, { name: b, ... }]
40 | 
41 |     In other words, this function makes the types for default lists with single and
42 |     multiple items be typed identically.
43 |     """
44 | 
45 |     def separate_fn(joined: dict) -> list:
46 |         # The dummy allows the union to be converted.
47 |         @dataclass
48 |         class Dummy:
49 |             dummy: data_class_union
50 | 
51 |         return [
52 |             get_typed_config(Dummy, DictConfig({"dummy": {"name": name, **cfg}})).dummy
53 |             for name, cfg in joined.items()
54 |         ]
55 | 
56 |     return separate_fn
57 | 


--------------------------------------------------------------------------------
/egomono4d/config/tools.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from pathlib import Path
 3 | from typing import Type, TypeVar
 4 | 
 5 | from dacite import Config, from_dict
 6 | from omegaconf import DictConfig, OmegaConf
 7 | 
 8 | TYPE_HOOKS = {
 9 |     Path: Path,
10 | }
11 | 
12 | 
13 | T = TypeVar("T")
14 | 
15 | 
16 | def get_typed_config(
17 |     data_class: Type[T],
18 |     cfg: DictConfig,
19 |     extra_type_hooks: dict = {},
20 | ) -> T:
21 |     cfg_res = from_dict(data_class, OmegaConf.to_container(cfg), config=Config(type_hooks={**TYPE_HOOKS, **extra_type_hooks}, cast=[tuple]),)
22 |     return cfg_res
23 | 
24 | 
25 | def separate_multiple_defaults(data_class_union):
26 |     """Return a function that will pull individual configurations out of a merged dict.
27 |     For example, the merged dict might look like this:
28 | 
29 |     {
30 |         a: ...
31 |         b: ...
32 |     }
33 | 
34 |     The returned function will generate this:
35 | 
36 |     [{ name: a, ... }, { name: b, ... }]
37 | 
38 |     In other words, this function makes the types for default lists with single and
39 |     multiple items be typed identically.
40 |     """
41 | 
42 |     def separate_fn(joined: dict) -> list:
43 |         # The dummy allows the union to be converted.
44 |         @dataclass
45 |         class Dummy:
46 |             dummy: data_class_union
47 | 
48 |         dummy_dict = [
49 |             get_typed_config(Dummy, DictConfig({"dummy": {"name": name, **cfg}})).dummy
50 |             for name, cfg in joined.items()
51 |         ]
52 |         return dummy_dict
53 | 
54 |     return separate_fn
55 | 


--------------------------------------------------------------------------------
/egomono4d/loss/mapping/mapping.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Generic, TypeVar
 3 | 
 4 | import torch
 5 | from jaxtyping import Float
 6 | from torch import Tensor, nn
 7 | 
 8 | 
 9 | def fix_aspect_ratio(
10 |     points: Float[Tensor, "*batch 2"],
11 |     image_shape: tuple[int, int],
12 | ) -> Float[Tensor, "*batch 2"]:
13 |     """When computing losses on normalized image coordinates (width in range [0, 1] and
14 |     height in range [0, 1]), distances are skewed based on the aspect ratio. This
15 |     function scales space based on the aspect ratio to correct for this skew.
16 |     """
17 |     h, w = image_shape
18 |     scale = (h * w) ** 0.5
19 |     correction = torch.tensor(
20 |         (w / scale, h / scale),
21 |         dtype=points.dtype,
22 |         device=points.device,
23 |     )
24 |     return points * correction
25 | 
26 | 
27 | T = TypeVar("T")
28 | 
29 | 
30 | class Mapping(nn.Module, ABC, Generic[T]):
31 |     def __init__(self, cfg: T) -> None:
32 |         super().__init__()
33 |         self.cfg = cfg
34 | 
35 |     def forward(
36 |         self,
37 |         a: Float[Tensor, "*#batch 2"],
38 |         b: Float[Tensor, "*#batch 2"],
39 |         image_shape: tuple[int, int],
40 |     ) -> Float[Tensor, " *batch"]:
41 |         a = fix_aspect_ratio(a, image_shape)
42 |         b = fix_aspect_ratio(b, image_shape)
43 |         return self.forward_undistorted(a - b)
44 | 
45 |     @abstractmethod
46 |     def forward_undistorted(
47 |         self,
48 |         delta: Float[Tensor, "*batch 2"],
49 |     ) -> Float[Tensor, " *batch"]:
50 |         pass
51 | 


--------------------------------------------------------------------------------
/egomono4d/eval/eval_depth.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pdb
 3 | from ..loss.loss_midas import compute_scale_and_shift
 4 | 
 5 | EPS = 1e-6
 6 | 
 7 | def eval_depth_conductor(pred_depth, gt_depth, gt_flys):   # (b, f, h, w) * 3
 8 | 
 9 |     b, f, h, w = pred_depth.shape
10 | 
11 |     pred_depth_align = pred_depth.reshape(b,f*h,w)
12 |     gt_depth_align = gt_depth.reshape(b,f*h,w)
13 |     gt_flys_align = gt_flys.reshape(b,f*h,w)
14 |     scale_video, shift_video = compute_scale_and_shift(pred_depth_align, gt_depth_align, gt_flys_align)
15 | 
16 |     pred_depth_align = scale_video.view(-1, 1, 1) * pred_depth_align + shift_video.view(-1, 1, 1)
17 |     err = torch.abs(pred_depth_align - gt_depth_align)
18 |     err_rel = err / (gt_depth_align + EPS)
19 | 
20 |     err_sq = err ** 2
21 |     thresh = torch.maximum((gt_depth_align / (pred_depth_align + EPS)), (pred_depth_align / (gt_depth_align + EPS)))
22 |     gt_flys = gt_flys.reshape(b,f*h,w)
23 | 
24 |     return {
25 |         'DEPTH_AbsRel(%)': 100 * ((err_rel*gt_flys).sum()/(gt_flys.sum())).item(),
26 |         'DEPTH_RMSE(mm)': 1000 * torch.sqrt((err_sq*gt_flys).sum()/(gt_flys.sum())).item(),
27 |         'DEPTH_Delta_[.025](%)': (100*(((thresh < 1.025).float()*gt_flys).sum())/(gt_flys.sum())).item(),
28 |         'DEPTH_Delta_[.05](%)': (100*(((thresh < 1.05).float()*gt_flys).sum())/(gt_flys.sum())).item(),
29 |         'DEPTH_Delta_[.1](%)': (100*(((thresh < 1.1).float()*gt_flys).sum())/(gt_flys.sum())).item(),
30 |         'DEPTH_Delta_[.25](%)': (100*(((thresh < 1.25).float()*gt_flys).sum())/(gt_flys.sum())).item(),
31 |         'DEPTH_Delta_[.25]^2(%)': (100*(((thresh < 1.25**2).float()*gt_flys).sum())/(gt_flys.sum())).item(),
32 |     }


--------------------------------------------------------------------------------
/egomono4d/model/extrinsics/extrinsics_procrustes_ransac.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | import numpy as np
 4 | 
 5 | import torch
 6 | from jaxtyping import Float
 7 | from torch import Tensor
 8 | 
 9 | from ...dataset.types import Batch
10 | from ...flow.flow_predictor import Flows
11 | from ..backbone.backbone import BackboneOutput
12 | from ..projection import align_surfaces_eval
13 | from .extrinsics import Extrinsics
14 | 
15 | 
16 | @dataclass
17 | class ExtrinsicsProcrustesRANSACCfg:
18 |     name: Literal["procrustes_ransac"]
19 |     max_iter: int | None
20 |     num_points: int | None
21 | 
22 | 
23 | class ExtrinsicsProcrustesRANSAC(Extrinsics[ExtrinsicsProcrustesRANSACCfg]):
24 |     def forward(
25 |         self,
26 |         batch: Batch,
27 |         flows: Flows,
28 |         backbone_output: BackboneOutput,
29 |         surfaces: Float[Tensor, "batch frame height width 3"],
30 |     ) -> Float[Tensor, "batch frame 4 4"]:
31 |         device = surfaces.device
32 |         _, _, h, w, _ = surfaces.shape
33 | 
34 |         indices = torch.linspace(0, h*w-1, self.cfg.num_points, dtype=torch.int64, device=device,)
35 |         best_extrinsics, best_score = align_surfaces_eval(surfaces, flows.backward, backbone_output.weights, batch.flys, indices) 
36 |         for i in range(self.cfg.max_iter):
37 |             maybe_inliers = np.random.choice(h*w, size=self.cfg.num_points, replace=False)
38 |             extrinsics, score =  align_surfaces_eval(surfaces, flows.backward, backbone_output.weights, batch.flys, maybe_inliers) 
39 |             if score > best_score:
40 |                 # print(f"undate score: {score} > {best_score}")
41 |                 best_score = score 
42 |                 best_extrinsics = extrinsics 
43 |         
44 |         return extrinsics
45 | 


--------------------------------------------------------------------------------
/egomono4d/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | from ..frame_sampler import get_frame_sampler
 2 | from typing import Union, Optional, List
 3 | import os
 4 | 
 5 | from .dataset_merged import DatasetMerged
 6 | from .types import Stage
 7 | from .dataset_arctic import DatasetArctic, DatasetArcticCfg
 8 | from .dataset_pov_surgery import DatasetPOVSurgery, DatasetPOVSurgeryCfg
 9 | from .dataset_hoi4d import DatasetHOI4D, DatasetHOI4DCfg
10 | from .dataset_fpha import DatasetFPHA, DatasetFPHACfg
11 | from .dataset_h2o import DatasetH2O, DatasetH2OCfg
12 | from .dataset_egopat3d import DatasetEgoPAT3D, DatasetEgoPAT3DCfg
13 | from .dataset_epic_kitchen import DatasetEpicKitchen, DatasetEpicKitchenCfg
14 | 
15 | DATASETS = {
16 |     "arctic": DatasetArctic,
17 |     "pov_surgery": DatasetPOVSurgery,
18 |     "hoi4d": DatasetHOI4D,
19 |     "h2o": DatasetH2O,
20 |     "fpha": DatasetFPHA,
21 |     "egopat3d": DatasetEgoPAT3D,
22 |     "epic_kitchen": DatasetEpicKitchen
23 | }
24 | 
25 | DatasetCfg = Union[
26 |     DatasetArcticCfg,
27 |     DatasetPOVSurgeryCfg,
28 |     DatasetHOI4DCfg,
29 |     DatasetH2OCfg,
30 |     DatasetEgoPAT3DCfg,
31 |     DatasetFPHACfg,
32 |     DatasetEpicKitchenCfg
33 | ]
34 | 
35 | def get_dataset(
36 |     dataset_cfgs: List[DatasetCfg],
37 |     stage: Stage,
38 |     global_rank: int,
39 |     world_size: int,
40 |     data_ratio: Optional[float]=1.0,
41 |     debug: Optional[bool]=False,
42 | ) -> DatasetMerged:
43 | 
44 |     datasets = []
45 |     for cfg in dataset_cfgs:
46 |         frame_sampler = get_frame_sampler(cfg.frame_sampler, cfg.num_frames, stage)
47 |         dataset = DATASETS[cfg.name](cfg, stage, frame_sampler, global_rank, world_size, debug) 
48 |         datasets.append(dataset)
49 |         
50 |     return DatasetMerged(datasets, stage=stage, global_rank=global_rank, world_size=world_size, data_ratio=data_ratio)
51 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/scripts/submission.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | # generate prediction results for submission on sintel and kitti online servers
 5 | 
 6 | 
 7 | # GMFlow without refinement
 8 | 
 9 | # submission to sintel
10 | CUDA_VISIBLE_DEVICES=0 python main.py \
11 | --submission \
12 | --output_path submission/sintel-gmflow-norefine \
13 | --val_dataset sintel \
14 | --resume pretrained/gmflow_sintel-0c07dcb3.pth
15 | 
16 | # submission to kitti
17 | CUDA_VISIBLE_DEVICES=0 python main.py \
18 | --submission \
19 | --output_path submission/kitti-gmflow-norefine \
20 | --val_dataset kitti \
21 | --resume pretrained/gmflow_kitti-285701a8.pth
22 | 
23 | 
24 | # you can also visualize the predictions before submission
25 | # CUDA_VISIBLE_DEVICES=0 python main.py \
26 | # --submission \
27 | # --output_path submission/sintel-gmflow-norefine-vis \
28 | # --save_vis_flow \
29 | # --no_save_flo \
30 | # --val_dataset sintel \
31 | # --resume pretrained/gmflow_sintel.pth
32 | 
33 | 
34 | 
35 | 
36 | # GMFlow with refinement
37 | 
38 | # submission to sintel
39 | CUDA_VISIBLE_DEVICES=0 python main.py \
40 | --submission \
41 | --output_path submission/sintel-gmflow-withrefine \
42 | --val_dataset sintel \
43 | --resume pretrained/gmflow_with_refine_sintel-3ed1cf48.pth \
44 | --padding_factor 32 \
45 | --upsample_factor 4 \
46 | --num_scales 2 \
47 | --attn_splits_list 2 8 \
48 | --corr_radius_list -1 4 \
49 | --prop_radius_list -1 1
50 | 
51 | # submission to kitti
52 | CUDA_VISIBLE_DEVICES=0 python main.py \
53 | --submission \
54 | --output_path submission/kitti-gmflow-withrefine \
55 | --val_dataset kitti \
56 | --resume pretrained/gmflow_with_refine_kitti-8d3b9786.pth \
57 | --padding_factor 32 \
58 | --upsample_factor 4 \
59 | --num_scales 2 \
60 | --attn_splits_list 2 8 \
61 | --corr_radius_list -1 4 \
62 | --prop_radius_list -1 1
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/egomono4d/visualization/drawing/types.py:
--------------------------------------------------------------------------------
 1 | from typing import Iterable, Union
 2 | 
 3 | import torch
 4 | from einops import repeat
 5 | from jaxtyping import Float, Shaped
 6 | from torch import Tensor
 7 | 
 8 | Real = Union[float, int]
 9 | 
10 | Vector = Union[
11 |     Real,
12 |     Iterable[Real],
13 |     Shaped[Tensor, "3"],
14 |     Shaped[Tensor, "batch 3"],
15 | ]
16 | 
17 | 
18 | def sanitize_vector(
19 |     vector: Vector,
20 |     dim: int,
21 |     device: torch.device,
22 | ) -> Float[Tensor, "*#batch dim"]:
23 |     if isinstance(vector, Tensor):
24 |         vector = vector.type(torch.float32).to(device)
25 |     else:
26 |         vector = torch.tensor(vector, dtype=torch.float32, device=device)
27 |     while vector.ndim < 2:
28 |         vector = vector[None]
29 |     if vector.shape[-1] == 1:
30 |         vector = repeat(vector, "... () -> ... c", c=dim)
31 |     assert vector.shape[-1] == dim
32 |     assert vector.ndim == 2
33 |     return vector
34 | 
35 | 
36 | Scalar = Union[
37 |     Real,
38 |     Iterable[Real],
39 |     Shaped[Tensor, ""],
40 |     Shaped[Tensor, " batch"],
41 | ]
42 | 
43 | 
44 | def sanitize_scalar(scalar: Scalar, device: torch.device) -> Float[Tensor, "*#batch"]:
45 |     if isinstance(scalar, Tensor):
46 |         scalar = scalar.type(torch.float32).to(device)
47 |     else:
48 |         scalar = torch.tensor(scalar, dtype=torch.float32, device=device)
49 |     while scalar.ndim < 1:
50 |         scalar = scalar[None]
51 |     assert scalar.ndim == 1
52 |     return scalar
53 | 
54 | 
55 | Pair = Union[
56 |     Iterable[Real],
57 |     Shaped[Tensor, "2"],
58 | ]
59 | 
60 | 
61 | def sanitize_pair(pair: Pair, device: torch.device) -> Float[Tensor, "2"]:
62 |     if isinstance(pair, Tensor):
63 |         pair = pair.type(torch.float32).to(device)
64 |     else:
65 |         pair = torch.tensor(pair, dtype=torch.float32, device=device)
66 |     assert pair.shape == (2,)
67 |     return pair
68 | 


--------------------------------------------------------------------------------
/config/datagen_egopat3d.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [egopat3d]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 5
19 | 
20 | dataset:
21 |   egopat3d:
22 |     clip_frame: 20
23 | 
24 | wandb:
25 |   project: egomono4d
26 |   mode: online
27 |   name: placeholder
28 |   group: null
29 |   tags: null
30 | 
31 | checkpoint:
32 |   load: null
33 | 
34 | trainer:
35 |   val_check_interval: 0.1
36 |   gradient_clip_val: 10.0
37 |   max_epochs: 25
38 |   accumulate_grad_batches: 1
39 |   num_nodes: 1
40 |   gpus: 8
41 | 
42 | loss:
43 |   dynamic_area:
44 |     weight: 0.005
45 |     enable_after: 0
46 |   cc:
47 |     weight: 1.0
48 |     enable_after: 0
49 |   tracking_3d:
50 |     weight: 5.0
51 |     enable_after: 0 
52 |   flow_3d:
53 |     weight: 5.0
54 |     enable_after: 0  
55 |   shape:
56 |     weight: 4.0
57 |     enable_after: 0  
58 |     dynamic_coef: 1.0
59 |     decay_end_epochs: -1 
60 |     decay_low_weight: 1.0
61 | 
62 | model_wrapper:
63 |   lr: 5e-5
64 |   cache_track: false
65 | 
66 | model:
67 |   use_correspondence_weights: true
68 | 
69 | data_module:
70 |   train:
71 |     num_workers: 4
72 |     persistent_workers: true
73 |     batch_size: 2                           # batch-size of per-gpu
74 |     seed: 233
75 |   val:
76 |     num_workers: 4
77 |     persistent_workers: true
78 |     batch_size: 2
79 |     seed: 233
80 | 
81 | hydra:
82 |   run:
83 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/pretrain.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [h2o, hoi4d, fpha, egopat3d, epic_kitchen]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 4
19 | 
20 | wandb:
21 |   project: egomono4d
22 |   mode: online
23 |   name: placeholder
24 |   group: null
25 |   tags: null
26 | 
27 | checkpoint:
28 |   load: null
29 | 
30 | trainer:
31 |   val_check_interval: 0.1
32 |   # check_val_every_n_epoch: 1
33 |   gradient_clip_val: 10.0
34 |   max_epochs: 25
35 |   accumulate_grad_batches: 1
36 |   num_nodes: 1
37 |   gpus: 8
38 | 
39 | loss:
40 |   dynamic_area:
41 |     weight: 0.005
42 |     enable_after: 0
43 |   cc:
44 |     weight: 1.0
45 |     enable_after: 0
46 |   tracking_3d:
47 |     weight: 5.0
48 |     enable_after: 0 
49 |   flow_3d:
50 |     weight: 5.0
51 |     enable_after: 0  
52 |   shape:
53 |     weight: 4.0
54 |     enable_after: 0  
55 |     dynamic_coef: 1.0
56 |     decay_end_epochs: -1 
57 |     decay_low_weight: 1.0
58 | 
59 | model_wrapper:
60 |   lr: 5e-5
61 |   cache_track: false
62 | 
63 | model:
64 |   use_correspondence_weights: true
65 | 
66 | data_module:
67 |   train:
68 |     num_workers: 4
69 |     persistent_workers: true
70 |     batch_size: 2                           # batch-size of per-gpu
71 |     seed: 233
72 |   val:
73 |     num_workers: 4
74 |     persistent_workers: true
75 |     batch_size: 2
76 |     seed: 233
77 | 
78 | hydra:
79 |   run:
80 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/egomono4d/tracking/track_predictor.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from dataclasses import dataclass
 3 | import torch
 4 | from typing import Generic, TypeVar, Optional
 5 | 
 6 | from jaxtyping import Bool, Float
 7 | from torch import Tensor, nn
 8 | 
 9 | from ..misc.manipulable import Manipulable
10 | 
11 | T = TypeVar("T")
12 | 
13 | 
14 | def sample_image_grid_tracker(
15 |     shape,
16 |     device: torch.device = torch.device("cpu"),
17 | ):
18 |     """Get normalized (range 0 to 1) coordinates and integer indices for an image."""
19 |     indices = [torch.arange(length, device=device) for length in shape]
20 |     stacked_indices = torch.stack(torch.meshgrid(*indices, indexing="ij"), dim=-1)
21 |     coordinates = [(idx + 0.5) / length for idx, length in zip(indices, shape)]
22 |     coordinates = reversed(coordinates)
23 |     coordinates = torch.stack(torch.meshgrid(*coordinates, indexing="xy"), dim=-1)
24 |     return coordinates
25 | 
26 | 
27 | @dataclass
28 | class Tracks(Manipulable):
29 |     xy: Optional[Float[Tensor, "batch frame point 2"]] = None
30 |     visibility: Optional[Bool[Tensor, "batch frame point"]] = None
31 | 
32 |     # This is the first frame in the track sequence, not the query frame used to
33 |     # generate the sequence, which is often different.
34 |     start_frame: int = 0
35 | 
36 |     def build_from_track_list(self, track_list, device='cpu'):
37 |         self.xy = torch.concatenate([track.xy for track in track_list], dim=0)
38 |         self.visibility = torch.concatenate([track.visibility for track in track_list], dim=0)
39 | 
40 | 
41 | class TrackPredictor(nn.Module, ABC, Generic[T]):
42 |     def __init__(self, cfg: T) -> None:
43 |         super().__init__()
44 |         self.cfg = cfg
45 | 
46 |     @abstractmethod
47 |     def forward(
48 |         self,
49 |         videos: Float[Tensor, "batch frame 3 height width"],
50 |         query_frame: int,
51 |     ) -> Tracks:
52 |         pass
53 | 


--------------------------------------------------------------------------------
/egomono4d/misc/wandb_tools.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import wandb
 4 | 
 5 | from ..config.common import WandbCfg
 6 | 
 7 | 
 8 | def version_to_int(artifact) -> int:
 9 |     """Convert versions of the form vX to X. For example, v12 to 12."""
10 |     return int(artifact.version[1:])
11 | 
12 | 
13 | def download_checkpoint(
14 |     run_id: str,
15 |     download_dir: Path,
16 |     version: str | None,
17 | ) -> Path:
18 |     api = wandb.Api()
19 |     run = api.run(run_id)
20 | 
21 |     # Find the latest saved model checkpoint.
22 |     chosen = None
23 |     for artifact in run.logged_artifacts():
24 |         if artifact.type != "model" or artifact.state != "COMMITTED":
25 |             continue
26 | 
27 |         # If no version is specified, use the latest.
28 |         if version is None:
29 |             if chosen is None or version_to_int(artifact) > version_to_int(chosen):
30 |                 chosen = artifact
31 | 
32 |         # If a specific verison is specified, look for it.
33 |         elif version == artifact.version:
34 |             chosen = artifact
35 |             break
36 | 
37 |     # Download the checkpoint.
38 |     download_dir.mkdir(exist_ok=True, parents=True)
39 |     root = download_dir / run_id
40 |     chosen.download(root=root)
41 |     return root / "model.ckpt"
42 | 
43 | 
44 | def update_checkpoint_path(path: str | None, cfg: WandbCfg) -> Path | None:
45 |     if path is None:
46 |         return None
47 | 
48 |     if not str(path).startswith("wandb://"):
49 |         return Path(path)
50 | 
51 |     run_id, *version = path[len("wandb://") :].split(":")
52 |     if len(version) == 0:
53 |         version = None
54 |     elif len(version) == 1:
55 |         version = version[0]
56 |     else:
57 |         raise ValueError("Invalid version specifier!")
58 | 
59 |     project = cfg.project
60 |     return download_checkpoint(
61 |         f"{project}/{run_id}",
62 |         Path("checkpoints"),
63 |         version,
64 |     )
65 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/scripts/demo.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # inference GMFlow without refinement
 4 | 
 5 | # sintel
 6 | 
 7 | # only predict forward flow
 8 | CUDA_VISIBLE_DEVICES=0 python main.py \
 9 | --inference_dir demo/sintel_market_1 \
10 | --output_path output/gmflow-norefine-sintel_market_1 \
11 | --resume pretrained/gmflow_sintel-0c07dcb3.pth
12 | 
13 | # predict forward & backward flow
14 | CUDA_VISIBLE_DEVICES=0 python main.py \
15 | --inference_dir demo/sintel_market_1 \
16 | --output_path output/gmflow-norefine-sintel_market_1 \
17 | --pred_bidir_flow \
18 | --resume pretrained/gmflow_sintel-0c07dcb3.pth
19 | 
20 | 
21 | # predict forward & backward flow with forward-backward consistency check
22 | CUDA_VISIBLE_DEVICES=0 python main.py \
23 | --inference_dir demo/sintel_market_1 \
24 | --output_path output/gmflow-norefine-sintel_market_1 \
25 | --pred_bidir_flow \
26 | --fwd_bwd_consistency_check \
27 | --resume pretrained/gmflow_sintel-0c07dcb3.pth
28 | 
29 | 
30 | # davis
31 | 
32 | CUDA_VISIBLE_DEVICES=0 python main.py \
33 | --inference_dir demo/davis_breakdance-flare \
34 | --output_path output/gmflow-norefine-davis_breakdance-flare \
35 | --resume pretrained/gmflow_sintel-0c07dcb3.pth
36 | 
37 | 
38 | 
39 | 
40 | # inference GMFlow with refinement
41 | 
42 | CUDA_VISIBLE_DEVICES=0 python main.py \
43 | --inference_dir demo/davis_breakdance-flare \
44 | --output_path output/gmflow-withrefine-davis_breakdance-flare \
45 | --resume pretrained/gmflow_with_refine_sintel-3ed1cf48.pth \
46 | --padding_factor 32 \
47 | --upsample_factor 4 \
48 | --num_scales 2 \
49 | --attn_splits_list 2 8 \
50 | --corr_radius_list -1 4 \
51 | --prop_radius_list -1 1
52 | 
53 | 
54 | 
55 | 
56 | CUDA_VISIBLE_DEVICES=0 python main.py \
57 | --inference_dir demo/sintel_test_clean_market_1 \
58 | --output_path output/gmflow-norefine-sintel_test_clean_market_1 \
59 | --pred_bidir_flow \
60 | --fwd_bwd_consistency_check \
61 | --resume pretrained/gmflow_sintel-0c07dcb3.pth
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/egomono4d/loss/loss.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from dataclasses import dataclass
 3 | from typing import Generic, TypeVar
 4 | 
 5 | import torch
 6 | import pdb
 7 | from jaxtyping import Float
 8 | from torch import Tensor, nn
 9 | 
10 | from ..dataset.types import Batch
11 | from ..flow import Flows
12 | from ..model.model import ModelOutput
13 | from ..tracking import Tracks
14 | 
15 | 
16 | @dataclass
17 | class LossCfgCommon:
18 |     enable_after: int
19 |     weight: float
20 | 
21 | 
22 | T = TypeVar("T", bound=LossCfgCommon)
23 | 
24 | 
25 | class Loss(nn.Module, ABC, Generic[T]):
26 |     cfg: T
27 | 
28 |     def __init__(self, cfg: T) -> None:
29 |         super().__init__()
30 |         self.cfg = cfg
31 | 
32 |     def forward(
33 |         self,
34 |         batch: Batch,
35 |         flows: Flows | None,
36 |         tracks: list[Tracks] | None,
37 |         model_output: ModelOutput,
38 |         current_epoch: int,
39 |         return_unweighted=False
40 |     ) -> Float[Tensor, ""]:
41 | 
42 |         if current_epoch < self.cfg.enable_after:
43 |             zr_loss = torch.tensor(0, dtype=torch.float32, device=batch.videos.device)
44 |             if return_unweighted is True:
45 |                 return (zr_loss, zr_loss), None
46 |             else:
47 |                 return zr_loss, None
48 | 
49 |         loss, loss_package = self.compute_unweighted_loss(
50 |             batch, flows, tracks, model_output, current_epoch, return_unweighted
51 |         )
52 |         if return_unweighted is True:
53 |             return (self.cfg.weight * loss, 100 * loss), loss_package
54 |         else:
55 |             return self.cfg.weight * loss, loss_package
56 | 
57 |     @abstractmethod
58 |     def compute_unweighted_loss(
59 |         self,
60 |         batch: Batch,
61 |         flows: Flows,
62 |         tracks: list[Tracks] | None,
63 |         model_output: ModelOutput,
64 |         global_step: int,
65 |     ) -> tuple[Float[Tensor, ""], dict]:
66 |         pass
67 | 


--------------------------------------------------------------------------------
/config/datagen_arctic.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [arctic]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 4
19 | 
20 | dataset:
21 |   arctic:
22 |     clip_frame: 40
23 |     frame_sampler: pretrain_interval   
24 |     frame_max_interval: 4
25 | 
26 | wandb:
27 |   project: egomono4d
28 |   mode: online
29 |   name: placeholder
30 |   group: null
31 |   tags: null
32 | 
33 | checkpoint:
34 |   load: null
35 | 
36 | trainer:
37 |   val_check_interval: 0.1
38 |   gradient_clip_val: 10.0
39 |   max_epochs: 25
40 |   accumulate_grad_batches: 1
41 |   num_nodes: 1
42 |   gpus: 8
43 | 
44 | loss:
45 |   dynamic_area:
46 |     weight: 0.005
47 |     enable_after: 0
48 |   cc:
49 |     weight: 1.0
50 |     enable_after: 0
51 |   tracking_3d:
52 |     weight: 5.0
53 |     enable_after: 0 
54 |   flow_3d:
55 |     weight: 5.0
56 |     enable_after: 0  
57 |   shape:
58 |     weight: 4.0
59 |     enable_after: 0  
60 |     dynamic_coef: 1.0
61 |     decay_end_epochs: -1 
62 |     decay_low_weight: 1.0
63 | 
64 | model_wrapper:
65 |   lr: 5e-5
66 |   cache_track: false
67 | 
68 | model:
69 |   use_correspondence_weights: true
70 | 
71 | data_module:
72 |   train:
73 |     num_workers: 4
74 |     persistent_workers: true
75 |     batch_size: 2                           # batch-size of per-gpu
76 |     seed: 233
77 |   val:
78 |     num_workers: 4
79 |     persistent_workers: true
80 |     batch_size: 2
81 |     seed: 233
82 | 
83 | hydra:
84 |   run:
85 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/datagen_fpha.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [fpha]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 5
19 | 
20 | 
21 | dataset:
22 |   fpha:
23 |     clip_frame: 20
24 |     frame_sampler: pretrain_interval   
25 |     frame_max_interval: 3
26 | 
27 | wandb:
28 |   project: egomono4d
29 |   mode: online
30 |   name: placeholder
31 |   group: null
32 |   tags: null
33 | 
34 | checkpoint:
35 |   load: null
36 | 
37 | trainer:
38 |   val_check_interval: 0.1
39 |   gradient_clip_val: 10.0
40 |   max_epochs: 25
41 |   accumulate_grad_batches: 1
42 |   num_nodes: 1
43 |   gpus: 8
44 | 
45 | loss:
46 |   dynamic_area:
47 |     weight: 0.005
48 |     enable_after: 0
49 |   cc:
50 |     weight: 1.0
51 |     enable_after: 0
52 |   tracking_3d:
53 |     weight: 5.0
54 |     enable_after: 0 
55 |   flow_3d:
56 |     weight: 5.0
57 |     enable_after: 0  
58 |   shape:
59 |     weight: 4.0
60 |     enable_after: 0  
61 |     dynamic_coef: 1.0
62 |     decay_end_epochs: -1 
63 |     decay_low_weight: 1.0
64 | 
65 | model_wrapper:
66 |   lr: 5e-5
67 |   cache_track: false
68 | 
69 | model:
70 |   use_correspondence_weights: true
71 | 
72 | data_module:
73 |   train:
74 |     num_workers: 4
75 |     persistent_workers: true
76 |     batch_size: 2                           # batch-size of per-gpu
77 |     seed: 233
78 |   val:
79 |     num_workers: 4
80 |     persistent_workers: true
81 |     batch_size: 2
82 |     seed: 233
83 | 
84 | hydra:
85 |   run:
86 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/datagen_h2o.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [h2o]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 5
19 | 
20 | dataset:
21 |   h2o:
22 |     clip_frame: 20
23 |     frame_sampler: pretrain_interval   
24 |     frame_max_interval: 4
25 | 
26 |   
27 | wandb:
28 |   project: egomono4d
29 |   mode: online
30 |   name: placeholder
31 |   group: null
32 |   tags: null
33 | 
34 | checkpoint:
35 |   load: null
36 | 
37 | trainer:
38 |   val_check_interval: 0.1
39 |   gradient_clip_val: 10.0
40 |   max_epochs: 25
41 |   accumulate_grad_batches: 1
42 |   num_nodes: 1
43 |   gpus: 8
44 | 
45 | loss:
46 |   dynamic_area:
47 |     weight: 0.005
48 |     enable_after: 0
49 |   cc:
50 |     weight: 1.0
51 |     enable_after: 0
52 |   tracking_3d:
53 |     weight: 5.0
54 |     enable_after: 0 
55 |   flow_3d:
56 |     weight: 5.0
57 |     enable_after: 0  
58 |   shape:
59 |     weight: 4.0
60 |     enable_after: 0  
61 |     dynamic_coef: 1.0
62 |     decay_end_epochs: -1 
63 |     decay_low_weight: 1.0
64 | 
65 | model_wrapper:
66 |   lr: 5e-5
67 |   cache_track: false
68 | 
69 | model:
70 |   use_correspondence_weights: true
71 | 
72 | data_module:
73 |   train:
74 |     num_workers: 4
75 |     persistent_workers: true
76 |     batch_size: 2                           # batch-size of per-gpu
77 |     seed: 233
78 |   val:
79 |     num_workers: 4
80 |     persistent_workers: true
81 |     batch_size: 2
82 |     seed: 233
83 | 
84 | hydra:
85 |   run:
86 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/datagen_hoi4d.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [hoi4d]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 5
19 | 
20 | # frame_sampler: egomono4d/frame_sampler/__init__.py
21 | 
22 | dataset:
23 |   hoi4d:
24 |     frame_sampler: pretrain_interval   
25 |     frame_max_interval: 4
26 | 
27 | wandb:
28 |   project: egomono4d
29 |   mode: online
30 |   name: placeholder
31 |   group: null
32 |   tags: null
33 | 
34 | checkpoint:
35 |   load: null
36 | 
37 | trainer:
38 |   val_check_interval: 0.1
39 |   gradient_clip_val: 10.0
40 |   max_epochs: 25
41 |   accumulate_grad_batches: 1
42 |   num_nodes: 1
43 |   gpus: 8
44 | 
45 | loss:
46 |   dynamic_area:
47 |     weight: 0.005
48 |     enable_after: 0
49 |   cc:
50 |     weight: 1.0
51 |     enable_after: 0
52 |   tracking_3d:
53 |     weight: 5.0
54 |     enable_after: 0 
55 |   flow_3d:
56 |     weight: 5.0
57 |     enable_after: 0  
58 |   shape:
59 |     weight: 4.0
60 |     enable_after: 0  
61 |     dynamic_coef: 1.0
62 |     decay_end_epochs: -1 
63 |     decay_low_weight: 1.0
64 | 
65 | model_wrapper:
66 |   lr: 5e-5
67 |   cache_track: false
68 | 
69 | model:
70 |   use_correspondence_weights: true
71 | 
72 | data_module:
73 |   train:
74 |     num_workers: 4
75 |     persistent_workers: true
76 |     batch_size: 2                           # batch-size of per-gpu
77 |     seed: 233
78 |   val:
79 |     num_workers: 4
80 |     persistent_workers: true
81 |     batch_size: 2
82 |     seed: 233
83 | 
84 | hydra:
85 |   run:
86 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/pretrain_eval_h2o.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [h2o]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 4
19 | 
20 | dataset:
21 |   h2o:
22 |     clip_frame: 40
23 | 
24 | wandb:
25 |   project: egomono4d
26 |   mode: online
27 |   name: placeholder
28 |   group: null
29 |   tags: null
30 | 
31 | checkpoint:
32 |   load:  ./cache/processed_datasets/egomono4d_result/2024-09-11/14-12-41/ptr_all_01_dp2/egomono4d/tdqluu5w/checkpoints/last.ckpt
33 | 
34 | trainer:
35 |   val_check_interval: 0.1
36 |   gradient_clip_val: 10.0
37 |   max_epochs: 25
38 |   accumulate_grad_batches: 1
39 |   num_nodes: 1
40 |   gpus: 8
41 | 
42 | loss:
43 |   dynamic_area:
44 |     weight: 0.005
45 |     enable_after: 0
46 |   cc:
47 |     weight: 1.0
48 |     enable_after: 0
49 |   tracking_3d:
50 |     weight: 5.0
51 |     enable_after: 0 
52 |   flow_3d:
53 |     weight: 5.0
54 |     enable_after: 0  
55 |   shape:
56 |     weight: 4.0
57 |     enable_after: 0  
58 |     dynamic_coef: 1.0
59 |     decay_end_epochs: -1 
60 |     decay_low_weight: 1.0
61 | 
62 | model_wrapper:
63 |   lr: 5e-5
64 |   cache_track: false
65 | 
66 | model:
67 |   use_correspondence_weights: true
68 | 
69 | data_module:
70 |   train:
71 |     num_workers: 4
72 |     persistent_workers: true
73 |     batch_size: 2                           # batch-size of per-gpu
74 |     seed: 233
75 |   val:
76 |     num_workers: 4
77 |     persistent_workers: true
78 |     batch_size: 2
79 |     seed: 233
80 | 
81 | hydra:
82 |   run:
83 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/pretrain_eval_hoi4d.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [hoi4d]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 4
19 | 
20 | dataset:
21 |   hoi4d:
22 |     clip_frame: 40
23 | 
24 | wandb:
25 |   project: egomono4d
26 |   mode: online
27 |   name: placeholder
28 |   group: null
29 |   tags: null
30 | 
31 | checkpoint:
32 |   load:  ./cache/processed_datasets/egomono4d_result/2024-09-11/14-12-41/ptr_all_01_dp2/egomono4d/tdqluu5w/checkpoints/last.ckpt
33 | 
34 | trainer:
35 |   val_check_interval: 0.1
36 |   gradient_clip_val: 10.0
37 |   max_epochs: 25
38 |   accumulate_grad_batches: 1
39 |   num_nodes: 1
40 |   gpus: 8
41 | 
42 | loss:
43 |   dynamic_area:
44 |     weight: 0.005
45 |     enable_after: 0
46 |   cc:
47 |     weight: 1.0
48 |     enable_after: 0
49 |   tracking_3d:
50 |     weight: 5.0
51 |     enable_after: 0 
52 |   flow_3d:
53 |     weight: 5.0
54 |     enable_after: 0  
55 |   shape:
56 |     weight: 4.0
57 |     enable_after: 0  
58 |     dynamic_coef: 1.0
59 |     decay_end_epochs: -1 
60 |     decay_low_weight: 1.0
61 | 
62 | model_wrapper:
63 |   lr: 5e-5
64 |   cache_track: false
65 | 
66 | model:
67 |   use_correspondence_weights: true
68 | 
69 | data_module:
70 |   train:
71 |     num_workers: 4
72 |     persistent_workers: true
73 |     batch_size: 2                           # batch-size of per-gpu
74 |     seed: 233
75 |   val:
76 |     num_workers: 4
77 |     persistent_workers: true
78 |     batch_size: 2
79 |     seed: 233
80 | 
81 | hydra:
82 |   run:
83 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/pretrain_eval_arctic.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [arctic]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 4
19 | 
20 | dataset:
21 |   arctic:
22 |     clip_frame: 40
23 | 
24 | wandb:
25 |   project: egomono4d
26 |   mode: online
27 |   name: placeholder
28 |   group: null
29 |   tags: null
30 | 
31 | checkpoint:
32 |   load:  ./cache/processed_datasets/egomono4d_result/2024-09-11/14-12-41/ptr_all_01_dp2/egomono4d/tdqluu5w/checkpoints/last.ckpt
33 | 
34 | trainer:
35 |   val_check_interval: 0.1
36 |   gradient_clip_val: 10.0
37 |   max_epochs: 25
38 |   accumulate_grad_batches: 1
39 |   num_nodes: 1
40 |   gpus: 8
41 | 
42 | loss:
43 |   dynamic_area:
44 |     weight: 0.005
45 |     enable_after: 0
46 |   cc:
47 |     weight: 1.0
48 |     enable_after: 0
49 |   tracking_3d:
50 |     weight: 5.0
51 |     enable_after: 0 
52 |   flow_3d:
53 |     weight: 5.0
54 |     enable_after: 0  
55 |   shape:
56 |     weight: 4.0
57 |     enable_after: 0  
58 |     dynamic_coef: 1.0
59 |     decay_end_epochs: -1 
60 |     decay_low_weight: 1.0
61 | 
62 | model_wrapper:
63 |   lr: 5e-5
64 |   cache_track: false
65 | 
66 | model:
67 |   use_correspondence_weights: true
68 | 
69 | data_module:
70 |   train:
71 |     num_workers: 4
72 |     persistent_workers: true
73 |     batch_size: 2                           # batch-size of per-gpu
74 |     seed: 233
75 |   val:
76 |     num_workers: 4
77 |     persistent_workers: true
78 |     batch_size: 2
79 |     seed: 233
80 | 
81 | hydra:
82 |   run:
83 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/pretrain_eval_pov_surgery.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [pov_surgery]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 4
19 | 
20 | dataset:
21 |   pov_surgery:
22 |     clip_frame: 40
23 | 
24 | wandb:
25 |   project: egomono4d
26 |   mode: online
27 |   name: placeholder
28 |   group: null
29 |   tags: null
30 | 
31 | checkpoint:
32 |   load:  ./cache/processed_datasets/egomono4d_result/2024-09-11/14-12-41/ptr_all_01_dp2/egomono4d/tdqluu5w/checkpoints/last.ckpt
33 | 
34 | trainer:
35 |   val_check_interval: 0.1
36 |   gradient_clip_val: 10.0
37 |   max_epochs: 25
38 |   accumulate_grad_batches: 1
39 |   num_nodes: 1
40 |   gpus: 8
41 | 
42 | loss:
43 |   dynamic_area:
44 |     weight: 0.005
45 |     enable_after: 0
46 |   cc:
47 |     weight: 1.0
48 |     enable_after: 0
49 |   tracking_3d:
50 |     weight: 5.0
51 |     enable_after: 0 
52 |   flow_3d:
53 |     weight: 5.0
54 |     enable_after: 0  
55 |   shape:
56 |     weight: 4.0
57 |     enable_after: 0  
58 |     dynamic_coef: 1.0
59 |     decay_end_epochs: -1 
60 |     decay_low_weight: 1.0
61 | 
62 | model_wrapper:
63 |   lr: 5e-5
64 |   cache_track: false
65 | 
66 | model:
67 |   use_correspondence_weights: true
68 | 
69 | data_module:
70 |   train:
71 |     num_workers: 4
72 |     persistent_workers: true
73 |     batch_size: 2                           # batch-size of per-gpu
74 |     seed: 233
75 |   val:
76 |     num_workers: 4
77 |     persistent_workers: true
78 |     batch_size: 2
79 |     seed: 233
80 | 
81 | hydra:
82 |   run:
83 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/config/datagen_pov_surgery.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [pov_surgery]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: midas            
 6 |   - model/intrinsics: softmin
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, midas, tracking, flow, tracking_3d]     # simple_arap
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 4
19 | 
20 | dataset:
21 |   pov_surgery:
22 |     clip_frame: 40
23 |     frame_sampler: pretrain_interval   
24 |     frame_max_interval: 4
25 |     
26 | wandb:
27 |   project: egomono4d
28 |   mode: online
29 |   name: placeholder
30 |   group: null
31 |   tags: null
32 | 
33 | checkpoint:
34 |   load: null
35 | 
36 | trainer:
37 |   val_check_interval: 0.1
38 |   # check_val_every_n_epoch: 1
39 |   gradient_clip_val: 10.0
40 |   max_epochs: 25
41 |   accumulate_grad_batches: 1
42 |   num_nodes: 1
43 |   gpus: 8
44 | 
45 | loss:
46 |   dynamic_area:
47 |     weight: 0.005
48 |     enable_after: 0
49 |   cc:
50 |     weight: 1.0
51 |     enable_after: 0
52 |   tracking_3d:
53 |     weight: 5.0
54 |     enable_after: 0 
55 |   flow_3d:
56 |     weight: 5.0
57 |     enable_after: 0  
58 |   shape:
59 |     weight: 4.0
60 |     enable_after: 0  
61 |     dynamic_coef: 1.0
62 |     decay_end_epochs: -1 
63 |     decay_low_weight: 1.0
64 | 
65 | model_wrapper:
66 |   lr: 5e-5
67 |   cache_track: false
68 | 
69 | model:
70 |   use_correspondence_weights: true
71 | 
72 | data_module:
73 |   train:
74 |     num_workers: 4
75 |     persistent_workers: true
76 |     batch_size: 2                           # batch-size of per-gpu
77 |     seed: 233
78 |   val:
79 |     num_workers: 4
80 |     persistent_workers: true
81 |     batch_size: 2
82 |     seed: 233
83 | 
84 | hydra:
85 |   run:
86 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/egomono4d/model/extrinsics/extrinsics_procrustes_flow.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | import torch
 5 | from jaxtyping import Float
 6 | from torch import Tensor
 7 | 
 8 | from ...dataset.types import Batch
 9 | from ...flow.flow_predictor import Flows
10 | from ..backbone.backbone import BackboneOutput
11 | from ..projection import align_surfaces
12 | from .extrinsics import Extrinsics
13 | 
14 | 
15 | @dataclass
16 | class ExtrinsicsProcrustesFlowCfg:
17 |     name: Literal["procrustes_flow"]
18 |     num_points: int | None
19 |     randomize_points: bool
20 | 
21 | 
22 | class ExtrinsicsProcrustesFlow(Extrinsics[ExtrinsicsProcrustesFlowCfg]):
23 |     def forward(
24 |         self,
25 |         batch: Batch,
26 |         flows: Flows,
27 |         backbone_output: BackboneOutput,
28 |         surfaces: Float[Tensor, "batch frame height width 3"],
29 |     ) -> Float[Tensor, "batch frame 4 4"]:
30 |         device = surfaces.device
31 |         _, _, h, w, _ = surfaces.shape
32 | 
33 |         # Select the subset of points used for the alignment.
34 |         if self.cfg.num_points is None:
35 |             indices = torch.arange(h * w, dtype=torch.int64, device=device)
36 |         elif self.cfg.randomize_points:
37 |             indices = torch.randint(
38 |                 0,
39 |                 h * w,
40 |                 (self.cfg.num_points,),
41 |                 dtype=torch.int64,
42 |                 device=device,
43 |             )
44 |         else:
45 |             indices = torch.linspace(
46 |                 0,
47 |                 h * w - 1,
48 |                 self.cfg.num_points,
49 |                 dtype=torch.int64,
50 |                 device=device,
51 |             )
52 | 
53 |         # Align the depth maps using a Procrustes fit.
54 |         return align_surfaces(
55 |             surfaces,                 # (B, F, H, W, 3)
56 |             flows.backward,           # (B, F-1, H, W, 2)
57 |             backbone_output.weights,  # (B, F-1, H, W)
58 |             indices,                  # rand-index (H*W)
59 |         ) 
60 |         # (B, F, 4, 4)


--------------------------------------------------------------------------------
/config/datagen_epic_kitchen.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataset: [epic_kitchen]
 3 |   - flow: gmflow
 4 |   - tracking: cotracker
 5 |   - model/backbone: nvds_unidepth             
 6 |   - model/intrinsics: model
 7 |   - model/extrinsics: procrustes_flow
 8 |   - loss: [dynamic_area, cc, tracking_3d, flow_3d, shape] 
 9 |   - visualizer: [summary]
10 |   - _self_
11 | 
12 | base_cache_dir: ./cache
13 | save_dir: ./cache/models
14 | 
15 | preprocess:
16 |   resize_shape: [300, 400]      # First resize the image into resize_shape.
17 |   patch_size: 32                # Then conduct center_crop with w&h divided by patch_size equal to 0. 
18 |   num_frames: 5
19 | 
20 | dataset:
21 |   epic_kitchen:
22 |     clip_frame: 20
23 |     mask_estimation: ['egohos', 'epipolar']   
24 |     mask_binary_open_value: 5000 
25 |     frame_sampler: pretrain_interval   
26 |     frame_max_interval: 3
27 | 
28 | wandb:
29 |   project: egomono4d
30 |   mode: online
31 |   name: placeholder
32 |   group: null
33 |   tags: null
34 | 
35 | checkpoint:
36 |   load: null
37 | 
38 | trainer:
39 |   val_check_interval: 0.1
40 |   gradient_clip_val: 10.0
41 |   max_epochs: 25
42 |   accumulate_grad_batches: 1
43 |   num_nodes: 1
44 |   gpus: 8
45 | 
46 | loss:
47 |   dynamic_area:
48 |     weight: 0.005
49 |     enable_after: 0
50 |   cc:
51 |     weight: 1.0
52 |     enable_after: 0
53 |   tracking_3d:
54 |     weight: 5.0
55 |     enable_after: 0 
56 |   flow_3d:
57 |     weight: 5.0
58 |     enable_after: 0  
59 |   shape:
60 |     weight: 4.0
61 |     enable_after: 0  
62 |     dynamic_coef: 1.0
63 |     decay_end_epochs: -1 
64 |     decay_low_weight: 1.0
65 | 
66 | model_wrapper:
67 |   lr: 5e-5
68 |   cache_track: false
69 | 
70 | model:
71 |   use_correspondence_weights: true
72 | 
73 | data_module:
74 |   train:
75 |     num_workers: 4
76 |     persistent_workers: true
77 |     batch_size: 2                           # batch-size of per-gpu
78 |     seed: 233
79 |   val:
80 |     num_workers: 4
81 |     persistent_workers: true
82 |     batch_size: 2
83 |     seed: 233
84 | 
85 | hydra:
86 |   run:
87 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/gmflow/position.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | # https://github.com/facebookresearch/detr/blob/main/models/position_encoding.py
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import math
 7 | 
 8 | 
 9 | class PositionEmbeddingSine(nn.Module):
10 |     """
11 |     This is a more standard version of the position embedding, very similar to the one
12 |     used by the Attention is all you need paper, generalized to work on images.
13 |     """
14 | 
15 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=True, scale=None):
16 |         super().__init__()
17 |         self.num_pos_feats = num_pos_feats
18 |         self.temperature = temperature
19 |         self.normalize = normalize
20 |         if scale is not None and normalize is False:
21 |             raise ValueError("normalize should be True if scale is passed")
22 |         if scale is None:
23 |             scale = 2 * math.pi
24 |         self.scale = scale
25 | 
26 |     def forward(self, x):
27 |         # x = tensor_list.tensors  # [B, C, H, W]
28 |         # mask = tensor_list.mask  # [B, H, W], input with padding, valid as 0
29 |         b, c, h, w = x.size()
30 |         mask = torch.ones((b, h, w), device=x.device)  # [B, H, W]
31 |         y_embed = mask.cumsum(1, dtype=torch.float32)
32 |         x_embed = mask.cumsum(2, dtype=torch.float32)
33 |         if self.normalize:
34 |             eps = 1e-6
35 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
36 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
37 | 
38 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
39 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
40 | 
41 |         pos_x = x_embed[:, :, :, None] / dim_t
42 |         pos_y = y_embed[:, :, :, None] / dim_t
43 |         pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
44 |         pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
45 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
46 |         return pos
47 | 


--------------------------------------------------------------------------------
/egomono4d/loss/loss_shape.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | import torch
 4 | import pdb
 5 | 
 6 | from jaxtyping import Float
 7 | from torch import Tensor
 8 | import torch.nn.functional as F
 9 | from torchvision.utils import save_image
10 | 
11 | from ..dataset.types import Batch
12 | from ..flow import Flows
13 | from ..model.model import ModelOutput
14 | from ..tracking import Tracks
15 | from .loss import Loss, LossCfgCommon
16 | 
17 | from ..model.procrustes import align_scaled_rigid
18 | 
19 | @dataclass
20 | class LossShapeCfg(LossCfgCommon):
21 |     name: Literal["shape"]
22 |     dynamic_coef: float
23 |     decay_end_epochs: int
24 |     decay_low_weight: float
25 | 
26 | 
27 | def loss_shape_func(ref_pcds, surfaces, flys, loss_func, return_val=False, inf_mode=False, cfg=None):
28 |     b, f, h, w, _ = ref_pcds.shape
29 |     device = ref_pcds.device
30 | 
31 |     surfaces = surfaces.reshape(b*f, h*w, 3)
32 |     pcd_r = ref_pcds.reshape(b*f, h*w, 3)
33 | 
34 |     # we keep all points the same weight to conduct constraint on shape rather than points.
35 |     weights = flys.reshape(b*f,h*w)
36 |     transform, scale = align_scaled_rigid(surfaces, pcd_r, weights=weights)
37 | 
38 |     surfaces_transformed = torch.matmul(transform[..., :3,:3], surfaces.mT).mT + transform[..., None, :3, 3]
39 | 
40 |     loss_map = loss_func(surfaces_transformed, pcd_r).sum(dim=-1) * weights 
41 |     loss_map = loss_map.reshape(b, f, h, w)
42 |     loss = loss_map.sum() / weights.sum()
43 | 
44 |     return loss, {"shape": loss}
45 | 
46 | 
47 | class LossShape(Loss[LossShapeCfg]):
48 |     def __init__(self, cfg: LossShapeCfg) -> None:
49 |         super().__init__(cfg)
50 |         self.loss = torch.nn.MSELoss(reduction="none")
51 | 
52 |     def compute_unweighted_loss(
53 |         self,
54 |         batch: Batch,
55 |         flows: Flows,
56 |         tracks: list[Tracks] | None,
57 |         model_output: ModelOutput,
58 |         current_epoch: int,
59 |         return_val: bool,
60 |     ) -> tuple[Float[Tensor, ""], dict]:
61 |         return loss_shape_func(batch.pcds, model_output.surfaces, batch.flys, self.loss, return_val=return_val, cfg=self.cfg)
62 |         
63 | 


--------------------------------------------------------------------------------
/egomono4d/eval/eval_extrinsic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pdb
 3 | import numpy as np
 4 | from evo.core.trajectory import PoseTrajectory3D
 5 | from evo.core.transformations import quaternion_from_matrix
 6 | from evo.core import metrics
 7 | from evo.tools import file_interface
 8 | import evo.main_ape as main_ape
 9 | import evo.main_rpe as main_rpe
10 | 
11 | 
12 | def tensor_to_trajectory(tensor):
13 |     # Tensor(seq_length, 4, 4) --> PoseTrajectory3D
14 |     seq_length = len(tensor)
15 |     timestamps = np.arange(seq_length) 
16 |     return PoseTrajectory3D(poses_se3=list(tensor.cpu().numpy()), timestamps=timestamps)
17 |     
18 | 
19 | def eval_extrinsic_conductor(pred_extrinsic, gt_extrinsic, correct_scale=True):  # for mono-slam, correct_scale=True
20 | 
21 |     ate_list, rpe_trans_list, rpe_rot_list = [], [], []
22 |     for i in range(len(pred_extrinsic)):
23 | 
24 |         pred = pred_extrinsic[i]
25 |         gt = gt_extrinsic[i]
26 |         traj_est = tensor_to_trajectory(pred)
27 |         traj_ref = tensor_to_trajectory(gt)
28 | 
29 |         ate_result = main_ape.ape(traj_ref, traj_est, est_name='ate',
30 |                                   pose_relation=metrics.PoseRelation.translation_part, align=True, correct_scale=correct_scale)
31 | 
32 |         rpe_trans_result = main_rpe.rpe(traj_ref, traj_est, est_name='rpe_t', delta=1.0, delta_unit=metrics.Unit.frames,
33 |                                         pose_relation=metrics.PoseRelation.translation_part, align=True, rel_delta_tol=0.1, correct_scale=correct_scale)
34 | 
35 |         rpe_rot_result = main_rpe.rpe(traj_ref, traj_est, est_name='rpe_r', delta=1.0, delta_unit=metrics.Unit.frames, 
36 |                                       pose_relation=metrics.PoseRelation.rotation_angle_deg, align=True, rel_delta_tol=0.1, correct_scale=correct_scale)
37 |         ate_list.append(ate_result.stats["mean"])
38 |         rpe_trans_list.append(rpe_trans_result.stats["mean"])
39 |         rpe_rot_list.append(rpe_rot_result.stats["mean"])
40 | 
41 |     return {
42 |         'CAM_ATE(mm)': 1000.0 * sum(ate_list) / len(ate_list),
43 |         'CAM_RPE_Trans(mm)': 1000.0 * sum(rpe_trans_list) / len(rpe_trans_list),
44 |         'CAM_RPE_Rot(deg)': sum(rpe_rot_list) / len(rpe_rot_list)
45 |     }
46 | 
47 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class InputPadder:
 6 |     """ Pads images such that dimensions are divisible by 8 """
 7 | 
 8 |     def __init__(self, dims, mode='sintel', padding_factor=8):
 9 |         self.ht, self.wd = dims[-2:]
10 |         pad_ht = (((self.ht // padding_factor) + 1) * padding_factor - self.ht) % padding_factor
11 |         pad_wd = (((self.wd // padding_factor) + 1) * padding_factor - self.wd) % padding_factor
12 |         if mode == 'sintel':
13 |             self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, pad_ht // 2, pad_ht - pad_ht // 2]
14 |         else:
15 |             self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, 0, pad_ht]
16 | 
17 |     def pad(self, *inputs):
18 |         return [F.pad(x, self._pad, mode='replicate') for x in inputs]
19 | 
20 |     def unpad(self, x):
21 |         ht, wd = x.shape[-2:]
22 |         c = [self._pad[2], ht - self._pad[3], self._pad[0], wd - self._pad[1]]
23 |         return x[..., c[0]:c[1], c[2]:c[3]]
24 | 
25 | 
26 | def coords_grid(batch, ht, wd, normalize=False):
27 |     if normalize:  # [-1, 1]
28 |         coords = torch.meshgrid(2 * torch.arange(ht) / (ht - 1) - 1,
29 |                                 2 * torch.arange(wd) / (wd - 1) - 1)
30 |     else:
31 |         coords = torch.meshgrid(torch.arange(ht), torch.arange(wd))
32 |     coords = torch.stack(coords[::-1], dim=0).float()
33 |     return coords[None].repeat(batch, 1, 1, 1)  # [B, 2, H, W]
34 | 
35 | 
36 | def compute_out_of_boundary_mask(flow):
37 |     # flow: [B, 2, H, W]
38 |     assert flow.dim() == 4 and flow.size(1) == 2
39 |     b, _, h, w = flow.shape
40 |     init_coords = coords_grid(b, h, w).to(flow.device)
41 |     corres = init_coords + flow  # [B, 2, H, W]
42 | 
43 |     max_w = w - 1
44 |     max_h = h - 1
45 | 
46 |     valid_mask = (corres[:, 0] >= 0) & (corres[:, 0] <= max_w) & (corres[:, 1] >= 0) & (corres[:, 1] <= max_h)
47 | 
48 |     # in case very large flow
49 |     flow_mask = (flow[:, 0].abs() <= max_w) & (flow[:, 1].abs() <= max_h)
50 | 
51 |     valid_mask = valid_mask & flow_mask
52 | 
53 |     return valid_mask  # [B, H, W]
54 | 
55 | 
56 | def count_parameters(model):
57 |     num = sum(p.numel() for p in model.parameters() if p.requires_grad)
58 |     return num
59 | 


--------------------------------------------------------------------------------
/egomono4d/visualization/drawing/points.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import torch
 4 | from einops import repeat
 5 | from jaxtyping import Float
 6 | from torch import Tensor
 7 | 
 8 | from .coordinate_conversion import generate_conversions
 9 | from .rendering import render_over_image
10 | from .types import Pair, Scalar, Vector, sanitize_scalar, sanitize_vector
11 | 
12 | 
13 | def draw_points(
14 |     image: Float[Tensor, "3 height width"] | Float[Tensor, "4 height width"],
15 |     points: Vector,
16 |     color: Vector = [1, 1, 1],
17 |     radius: Scalar = 1,
18 |     inner_radius: Scalar = 0,
19 |     num_msaa_passes: int = 1,
20 |     x_range: Optional[Pair] = None,
21 |     y_range: Optional[Pair] = None,
22 | ) -> Float[Tensor, "3 height width"] | Float[Tensor, "4 height width"]:
23 |     device = image.device
24 |     points = sanitize_vector(points, 2, device)
25 |     color = sanitize_vector(color, 3, device)
26 |     radius = sanitize_scalar(radius, device)
27 |     inner_radius = sanitize_scalar(inner_radius, device)
28 |     (num_points,) = torch.broadcast_shapes(
29 |         points.shape[0],
30 |         color.shape[0],
31 |         radius.shape,
32 |         inner_radius.shape,
33 |     )
34 | 
35 |     # Convert world-space points to pixel space.
36 |     _, h, w = image.shape
37 |     world_to_pixel, _ = generate_conversions((h, w), device, x_range, y_range)
38 |     points = world_to_pixel(points)
39 | 
40 |     def color_function(
41 |         xy: Float[Tensor, "point 2"],
42 |     ) -> Float[Tensor, "point 4"]:
43 |         # Define a vector between the start and end points.
44 |         delta = xy[:, None] - points[None]
45 |         delta_norm = delta.norm(dim=-1)
46 |         mask = (delta_norm >= inner_radius[None]) & (delta_norm <= radius[None])
47 | 
48 |         # Determine the sample's color.
49 |         selectable_color = color.broadcast_to((num_points, 3))
50 |         arrangement = mask * torch.arange(num_points, device=device)
51 |         top_color = selectable_color.gather(
52 |             dim=0,
53 |             index=repeat(arrangement.argmax(dim=1), "s -> s c", c=3),
54 |         )
55 |         rgba = torch.cat((top_color, mask.any(dim=1).float()[:, None]), dim=-1)
56 | 
57 |         return rgba
58 | 
59 |     return render_over_image(image, color_function, device, num_passes=num_msaa_passes)
60 | 


--------------------------------------------------------------------------------
/egomono4d/datagen.py:
--------------------------------------------------------------------------------
 1 | import hydra
 2 | import torch
 3 | from jaxtyping import install_import_hook
 4 | from lightning import Trainer
 5 | import pdb
 6 | from lightning.pytorch.plugins.environments import SLURMEnvironment
 7 | from omegaconf import DictConfig
 8 | 
 9 | # Configure beartype and jaxtyping.
10 | with install_import_hook(
11 |     ("flowmap",),
12 |     ("beartype", "beartype"),
13 | ):
14 |     from .config.common import get_typed_root_config
15 |     from .config.pretrain import PretrainCfg
16 |     from .dataset.data_module_pretrain import DataModulePretrain
17 |     from .loss import get_losses
18 |     from .misc.common_training_setup import run_common_training_setup
19 |     from .model.model import Model
20 |     from .model.model_wrapper_pretrain import ModelWrapperPretrain
21 |     from .visualization import get_visualizers
22 | 
23 | from .dataset import get_dataset
24 | 
25 | @hydra.main(
26 |     version_base=None,
27 |     config_path="../config",
28 |     config_name="datagen_pov_surgery",
29 | )
30 | def pretrain(cfg_dict: DictConfig) -> None:
31 |     cfg = get_typed_root_config(cfg_dict, PretrainCfg)                     
32 |     cfg.flow.cache_dir = cfg.base_cache_dir
33 |     loss_name_list = [cfg_item.name for cfg_item in cfg.loss]
34 | 
35 |     for dataset_cfg in cfg.dataset:
36 |         dataset_cfg.resize_shape = cfg.preprocess.resize_shape
37 |         dataset_cfg.patch_size = cfg.preprocess.patch_size
38 |         dataset_cfg.num_frames = cfg.preprocess.num_frames
39 |         dataset_cfg.cache_dir = cfg.base_cache_dir
40 |         dataset_cfg.use_consistency_loss = ('cc' in loss_name_list)
41 |         if hasattr(dataset_cfg, "mask_flow_model"):
42 |             dataset_cfg.mask_flow_model = cfg.flow 
43 | 
44 |     cfg.trainer.gpus = 1
45 | 
46 |     dataset_train = get_dataset(cfg.dataset, 'train', debug=False, global_rank=0, world_size=1)
47 |     dataset_val = get_dataset(cfg.dataset, 'val', debug=False, global_rank=0, world_size=1)
48 |     dataset_test = get_dataset(cfg.dataset, 'test', debug=False, global_rank=0, world_size=1)
49 |     train = iter(dataset_train).__next__()
50 |     val = iter(dataset_val).__next__()
51 |     test = iter(dataset_test).__next__()
52 |     pdb.set_trace()
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     pretrain()
57 | 
58 | # CUDA_VISIBLE_DEVICES=0,1 python -m egomono4d.data


--------------------------------------------------------------------------------
/egomono4d/loss/loss_dynamic_area.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | import torch
 4 | import pdb
 5 | from einops import einsum, rearrange
 6 | 
 7 | from jaxtyping import Float
 8 | from torch import Tensor
 9 | import torch.nn.functional as F
10 | from torchvision.utils import save_image
11 | 
12 | from ..dataset.types import Batch
13 | from ..flow import Flows
14 | from ..model.model import ModelOutput
15 | from ..model.projection import sample_image_grid
16 | 
17 | from ..tracking import Tracks
18 | from .loss import Loss, LossCfgCommon
19 | from .mapping import MappingCfg, get_mapping
20 | 
21 | earlier = lambda x: x[:, :-1]  # noqa
22 | later = lambda x: x[:, 1:]  # noqa
23 | 
24 | 
25 | @dataclass
26 | class LossDynamicAreaCfg(LossCfgCommon):
27 |     name: Literal["dynamic_area"]
28 | 
29 | 
30 | class LossDynamicArea(Loss[LossDynamicAreaCfg]):
31 |     def __init__(self, cfg: LossDynamicAreaCfg) -> None:
32 |         super().__init__(cfg)
33 |         self.bce_loss = torch.nn.BCELoss(reduction="none")
34 | 
35 |     def compute_unweighted_loss(
36 |         self,
37 |         batch: Batch,
38 |         flows: Flows,
39 |         tracks: list[Tracks] | None,
40 |         model_output: ModelOutput,
41 |         current_epoch: int,
42 |         return_val: bool
43 |     ) -> tuple[Float[Tensor, ""], dict]:
44 |         
45 |         surfaces = model_output.surfaces
46 |         device = surfaces.device
47 |         b, f, h, w, _ = surfaces.shape
48 |         xy, _ = sample_image_grid((h, w), device=device)
49 | 
50 |         later_mask = later(batch.masks)        # (b, f-1, h, w)
51 |         b_xy_earlier = rearrange(xy + flows.backward, "b f h w xy -> (b f) h w xy")
52 |         earlier_mask = F.grid_sample(
53 |             rearrange(earlier(batch.masks), "b f h w -> (b f) () h w"),
54 |             b_xy_earlier * 2 - 1,
55 |             align_corners=True, 
56 |             mode='bilinear', 
57 |             padding_mode="zeros"
58 |         )
59 |         earlier_mask = rearrange(earlier_mask, "(b f) () h w -> b f h w", b=b, f=f-1)
60 |         gt_mask = later_mask * earlier_mask
61 | 
62 |         loss = self.bce_loss(model_output.backward_correspondence_weights, gt_mask)
63 |         valid = h * w * (f-1) * b
64 | 
65 | 
66 |         loss = loss.sum() / (valid or 1)
67 |         return loss, {"dynamic_area": loss}


--------------------------------------------------------------------------------
/egomono4d/misc/image_io.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | from pathlib import Path
 3 | from typing import Union
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | import torchvision.transforms as tf
 8 | from einops import rearrange, repeat
 9 | from jaxtyping import Float, UInt8
10 | from matplotlib.figure import Figure
11 | from PIL import Image
12 | from torch import Tensor
13 | 
14 | FloatImage = Union[
15 |     Float[Tensor, "height width"],
16 |     Float[Tensor, "channel height width"],
17 |     Float[Tensor, "batch channel height width"],
18 | ]
19 | 
20 | 
21 | def fig_to_image(
22 |     fig: Figure,
23 |     dpi: int = 100,
24 |     device: torch.device = torch.device("cpu"),
25 | ) -> Float[Tensor, "3 height width"]:
26 |     buffer = io.BytesIO()
27 |     fig.savefig(buffer, format="raw", dpi=dpi)
28 |     buffer.seek(0)
29 |     data = np.frombuffer(buffer.getvalue(), dtype=np.uint8)
30 |     h = int(fig.bbox.bounds[3])
31 |     w = int(fig.bbox.bounds[2])
32 |     data = rearrange(data, "(h w c) -> c h w", h=h, w=w, c=4)
33 |     buffer.close()
34 |     return (torch.tensor(data, device=device, dtype=torch.float32) / 255)[:3]
35 | 
36 | 
37 | def prep_image(image: FloatImage) -> UInt8[np.ndarray, "height width channel"]:
38 |     # Handle batched images.
39 |     if image.ndim == 4:
40 |         image = rearrange(image, "b c h w -> c h (b w)")
41 | 
42 |     # Handle single-channel images.
43 |     if image.ndim == 2:
44 |         image = rearrange(image, "h w -> () h w")
45 | 
46 |     # Ensure that there are 3 or 4 channels.
47 |     channel, _, _ = image.shape
48 |     if channel == 1:
49 |         image = repeat(image, "() h w -> c h w", c=3)
50 |     assert image.shape[0] in (3, 4)
51 | 
52 |     image = (image.detach().clip(min=0, max=1) * 255).type(torch.uint8)
53 |     return rearrange(image, "c h w -> h w c").cpu().numpy()
54 | 
55 | 
56 | def save_image(
57 |     image: FloatImage,
58 |     path: Union[Path, str],
59 | ) -> None:
60 |     """Save an image. Assumed to be in range 0-1."""
61 | 
62 |     # Create the parent directory if it doesn't already exist.
63 |     path = Path(path)
64 |     path.parent.mkdir(exist_ok=True, parents=True)
65 | 
66 |     # Save the image.
67 |     Image.fromarray(prep_image(image)).save(path)
68 | 
69 | 
70 | def load_image(
71 |     path: Union[Path, str],
72 | ) -> Float[Tensor, "3 height width"]:
73 |     return tf.ToTensor()(Image.open(path))[:3]
74 | 


--------------------------------------------------------------------------------
/egomono4d/misc/common_training_setup.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import hydra
 4 | import torch
 5 | import wandb
 6 | import os
 7 | from lightning.pytorch.callbacks import Callback, LearningRateMonitor, ModelCheckpoint
 8 | from lightning.pytorch.loggers import Logger
 9 | from lightning.pytorch.loggers.wandb import WandbLogger
10 | from omegaconf import DictConfig, OmegaConf
11 | 
12 | from ..config.common import CommonCfg
13 | from .local_logger import LOG_PATH, LocalLogger
14 | from .wandb_tools import update_checkpoint_path
15 | 
16 | 
17 | def run_common_training_setup(
18 |     cfg: CommonCfg,
19 |     cfg_dict: DictConfig
20 | ) -> tuple[list[Callback], Logger, Path | None, Path]:
21 |     torch.set_float32_matmul_precision("highest")
22 | 
23 |     # Set up callbacks.
24 |     callbacks = [
25 |         LearningRateMonitor("step", True),
26 |         ModelCheckpoint(
27 |             monitor="val/loss/total_loss",
28 |             mode="min",
29 |             dirpath=(LOG_PATH / "checkpoints") if cfg.wandb.mode == "disabled" else None,
30 |             save_top_k=1, 
31 |             save_last=True,
32 |             filename="best-{epoch}-{step}",
33 |         )
34 |     ]
35 | 
36 |     # Set up logging.
37 |     if cfg.wandb.mode == "disabled":
38 |         logger = LocalLogger()
39 |         output_dir = LOG_PATH
40 |         os.makedirs(output_dir, exist_ok=True)
41 |     else:
42 |         output_dir = Path(
43 |             hydra.core.hydra_config.HydraConfig.get()["runtime"]["output_dir"]
44 |         )
45 |         output_dir = output_dir / cfg.wandb.name
46 |         os.makedirs(output_dir, exist_ok=True)
47 |         logger = WandbLogger(
48 |             project=cfg.wandb.project,
49 |             name=cfg.wandb.name,
50 |             mode=cfg.wandb.mode,
51 |             group=cfg.wandb.group,
52 |             tags=cfg.wandb.tags,
53 |             config=OmegaConf.to_container(cfg_dict),
54 |             log_model=False,                                    # disabled artifact logging for storage saving
55 |             save_dir=output_dir,
56 |         )
57 | 
58 |         # Log code to wandb if rank is 0. On rank != 0, wandb.run is None.
59 |         if wandb.run is not None:
60 |             wandb.run.log_code("egomono4d")
61 | 
62 |     # Prepare the checkpoint for loading.
63 |     checkpoint_path = update_checkpoint_path(cfg.checkpoint.load, cfg.wandb)
64 | 
65 |     return callbacks, logger, checkpoint_path, output_dir
66 | 


--------------------------------------------------------------------------------
/egomono4d/model/backbone/modules/transformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from einops import rearrange, repeat
 5 | from einops.layers.torch import Rearrange
 6 | 
 7 | 
 8 | class FeedForward(nn.Module):
 9 |     def __init__(self, dim, hidden_dim, dropout = 0.):
10 |         super().__init__()
11 |         self.net = nn.Sequential(
12 |             nn.LayerNorm(dim),
13 |             nn.Linear(dim, hidden_dim),
14 |             nn.GELU(),
15 |             nn.Dropout(dropout),
16 |             nn.Linear(hidden_dim, dim),
17 |             nn.Dropout(dropout)
18 |         )
19 |     def forward(self, x):
20 |         return self.net(x)
21 |     
22 | 
23 | class Attention(nn.Module):
24 |     def __init__(self, dim, heads = 4, dim_head = 64, dropout = 0.):
25 |         super().__init__()
26 |         inner_dim = dim_head *  heads
27 |         project_out = not (heads == 1 and dim_head == dim)
28 | 
29 |         self.heads = heads
30 |         self.scale = dim_head ** -0.5
31 | 
32 |         self.norm = nn.LayerNorm(dim)
33 |         self.attend = nn.Softmax(dim = -1)
34 |         self.dropout = nn.Dropout(dropout)
35 | 
36 |         self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
37 | 
38 |         self.to_out = nn.Sequential(
39 |             nn.Linear(inner_dim, dim),
40 |             nn.Dropout(dropout)
41 |         ) if project_out else nn.Identity()
42 | 
43 |     def forward(self, x):
44 |         x = self.norm(x)
45 |         qkv = self.to_qkv(x).chunk(3, dim = -1)
46 |         q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)
47 | 
48 |         dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
49 | 
50 |         attn = self.attend(dots)
51 |         attn = self.dropout(attn)
52 | 
53 |         out = torch.matmul(attn, v)
54 |         out = rearrange(out, 'b h n d -> b n (h d)')
55 |         return self.to_out(out)
56 | 
57 | 
58 | class Transformer(nn.Module):
59 |     def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
60 |         super().__init__()
61 |         self.layers = nn.ModuleList([])
62 |         for _ in range(depth):
63 |             self.layers.append(nn.ModuleList([
64 |                 Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout),
65 |                 FeedForward(dim, mlp_dim, dropout = dropout)
66 |             ]))
67 |     def forward(self, x):
68 |         for attn, ff in self.layers:
69 |             x = attn(x) + x
70 |             x = ff(x) + x
71 |         return x
72 | 


--------------------------------------------------------------------------------
/egomono4d/frame_sampler/frame_sampler_pretrain.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | # from typing import Literal
 3 | from typing_extensions import Literal
 4 | 
 5 | import torch
 6 | from jaxtyping import Int64
 7 | from torch import Tensor
 8 | import random
 9 | 
10 | from .frame_sampler import FrameSampler
11 | 
12 | 
13 | class FrameSamplerPretrainNeighbor(FrameSampler):
14 |     def sample(
15 |         self,
16 |         num_frames_in_video: int,
17 |         device: torch.device,
18 |     ) -> Int64[Tensor, " frame"]:
19 |         # If the video doesn't have enough frames, just repeat the last frame.
20 |         if num_frames_in_video < self.num_frames:
21 |             indices = torch.arange(self.num_frames, device=device)
22 |             indices[indices >= num_frames_in_video] = num_frames_in_video - 1
23 |             return indices
24 | 
25 |         # If the video has enough frames, pick a random starting point.
26 |         if self.stage == 'train':
27 |             start = torch.randint(0, num_frames_in_video - self.num_frames + 1, tuple())
28 |         else:
29 |             start = 0
30 |         return torch.arange(start, start + self.num_frames, device=device)
31 | 
32 | 
33 | class FrameSamplerPretrainInterval(FrameSampler):
34 |     def sample(
35 |         self,
36 |         num_frames_in_video: int,
37 |         device: torch.device,
38 |         max_interval: int=1
39 |     ) -> Int64[Tensor, " frame"]:
40 |         # If the video doesn't have enough frames, just repeat the last frame.
41 |         if num_frames_in_video < self.num_frames:
42 |             indices = torch.arange(self.num_frames, device=device)
43 |             indices[indices >= num_frames_in_video] = num_frames_in_video - 1
44 |             return indices
45 |         
46 |         if num_frames_in_video - 1 < max_interval * (self.num_frames-1):
47 |             max_interval = (num_frames_in_video - 1) // (self.num_frames-1)
48 | 
49 |         if self.stage == 'train':
50 |             interval = random.randint(1, max_interval)
51 |             start = torch.randint(0, (num_frames_in_video-1)-interval*(self.num_frames-1), tuple())
52 |         else:
53 |             interval = (max_interval + 1) // 2    # we test the middle state as representative performance (between easiest and hardest).
54 |             start = ((num_frames_in_video-1)-interval*(self.num_frames-1)) // 2    # fixed it to eliminate uncertainty.
55 |         # print(f"interval: {interval}")
56 |         res_idx = torch.tensor([start+i*interval for i in range(self.num_frames)], device=device)
57 |         # print(f"max_interval={max_interval}, interval={interval}, res={res_idx}")
58 |         return res_idx


--------------------------------------------------------------------------------
/egomono4d/config/common.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | import os
 3 | import pdb
 4 | from typing import Type, TypeVar, Optional, List, Union # , Literal
 5 | 
 6 | from omegaconf import DictConfig
 7 | 
 8 | from ..dataset import DatasetCfg
 9 | from ..misc.cropping import CroppingCfg
10 | from .tools import get_typed_config, separate_multiple_defaults
11 | 
12 | 
13 | try:
14 |     EVAL = os.environ['EVAL_MODE']
15 | except:
16 |     EVAL = 'False'
17 | try:
18 |     INFER = os.environ['INFER_MODE']
19 | except:
20 |     INFER = 'False'
21 | if (EVAL not in ['True']):
22 |     from ..flow import FlowPredictorCfg
23 |     from ..loss import LossCfg
24 |     from ..model.model import ModelCfg
25 |     from ..visualization import VisualizerCfg
26 |     from ..tracking import TrackPredictorCfg
27 |     print("Install Training Cfg.")
28 | else:
29 |     FlowPredictorCfg, TrackPredictorCfg = None, None
30 |     LossCfg, ModelCfg, VisualizerCfg = None, None, None
31 | 
32 | @dataclass
33 | class WandbCfg:
34 |     project: str = "egomono4d"
35 |     mode: str = "disabled"
36 |     name: Optional[str] = None
37 |     group: Optional[str] = None
38 |     tags: Optional[List[str]] = None
39 | 
40 | 
41 | @dataclass
42 | class CheckpointCfg:
43 |     load: Optional[str] = None  # str instead of Path, since it could be wandb://...
44 | 
45 | 
46 | @dataclass
47 | class TrainerCfg:
48 |     val_check_interval: Union[int, float] = 1.0
49 |     # check_val_every_n_epoch: int
50 |     gradient_clip_val: float = 10.0
51 |     max_steps: Optional[int] = None 
52 |     max_epochs: Optional[int] = None
53 |     accumulate_grad_batches: Optional[int] = None
54 |     num_nodes: int = 1
55 |     gpus: int = 1
56 | 
57 | 
58 | @dataclass
59 | class CommonCfg:
60 |     base_cache_dir: str = None
61 |     save_dir: str = None
62 |     data_ratio: float = None
63 |     use_gt_depth: bool = False
64 |     wandb: WandbCfg = None
65 |     checkpoint: CheckpointCfg = None
66 |     trainer: TrainerCfg = None
67 |     flow: Optional[FlowPredictorCfg] = None
68 |     tracking: Optional[TrackPredictorCfg] = None
69 |     dataset: List[DatasetCfg] = None
70 |     model: ModelCfg = None
71 |     loss: List[LossCfg] = None
72 |     visualizer: List[VisualizerCfg] = None
73 |     cropping: Optional[CroppingCfg] = None
74 | 
75 | 
76 | T = TypeVar("T")
77 | 
78 | 
79 | def get_typed_root_config(cfg_dict: DictConfig, cfg_type: Type[T]) -> T:
80 |     return get_typed_config(
81 |         cfg_type,
82 |         cfg_dict,
83 |         {
84 |             List[DatasetCfg]: separate_multiple_defaults(DatasetCfg),
85 |             List[LossCfg]: separate_multiple_defaults(LossCfg),
86 |             List[VisualizerCfg]: separate_multiple_defaults(VisualizerCfg),
87 |         },
88 |     )
89 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from utils.flow_viz import flow_tensor_to_image
 4 | 
 5 | 
 6 | class Logger:
 7 |     def __init__(self, lr_scheduler,
 8 |                  summary_writer,
 9 |                  summary_freq=100,
10 |                  start_step=0,
11 |                  ):
12 |         self.lr_scheduler = lr_scheduler
13 |         self.total_steps = start_step
14 |         self.running_loss = {}
15 |         self.summary_writer = summary_writer
16 |         self.summary_freq = summary_freq
17 | 
18 |     def print_training_status(self, mode='train'):
19 | 
20 |         print('step: %06d \t epe: %.3f' % (self.total_steps, self.running_loss['epe'] / self.summary_freq))
21 | 
22 |         for k in self.running_loss:
23 |             self.summary_writer.add_scalar(mode + '/' + k,
24 |                                            self.running_loss[k] / self.summary_freq, self.total_steps)
25 |             self.running_loss[k] = 0.0
26 | 
27 |     def lr_summary(self):
28 |         lr = self.lr_scheduler.get_last_lr()[0]
29 |         self.summary_writer.add_scalar('lr', lr, self.total_steps)
30 | 
31 |     def add_image_summary(self, img1, img2, flow_preds, flow_gt, mode='train',
32 |                           ):
33 |         if self.total_steps % self.summary_freq == 0:
34 |             img_concat = torch.cat((img1[0].detach().cpu(), img2[0].detach().cpu()), dim=-1)
35 |             img_concat = img_concat.type(torch.uint8)  # convert to uint8 to visualize in tensorboard
36 | 
37 |             flow_pred = flow_tensor_to_image(flow_preds[-1][0])
38 |             forward_flow_gt = flow_tensor_to_image(flow_gt[0])
39 |             flow_concat = torch.cat((torch.from_numpy(flow_pred),
40 |                                      torch.from_numpy(forward_flow_gt)), dim=-1)
41 | 
42 |             concat = torch.cat((img_concat, flow_concat), dim=-2)
43 | 
44 |             self.summary_writer.add_image(mode + '/img_pred_gt', concat, self.total_steps)
45 | 
46 |     def push(self, metrics, mode='train'):
47 |         self.total_steps += 1
48 | 
49 |         self.lr_summary()
50 | 
51 |         for key in metrics:
52 |             if key not in self.running_loss:
53 |                 self.running_loss[key] = 0.0
54 | 
55 |             self.running_loss[key] += metrics[key]
56 | 
57 |         if self.total_steps % self.summary_freq == 0:
58 |             self.print_training_status(mode)
59 |             self.running_loss = {}
60 | 
61 |     def write_dict(self, results):
62 |         for key in results:
63 |             tag = key.split('_')[0]
64 |             tag = tag + '/' + key
65 |             self.summary_writer.add_scalar(tag, results[key], self.total_steps)
66 | 
67 |     def close(self):
68 |         self.summary_writer.close()
69 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/scripts/evaluate.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # evaluate GMFlow without refinement
 4 | 
 5 | # evaluate chairs & things trained model on things and sintel (Table 3 of GMFlow paper)
 6 | # the output should be:
 7 | # Number of validation image pairs: 1024
 8 | # Validation Things test set (things_clean) EPE: 3.475
 9 | # Validation Things test (things_clean) s0_10: 0.666, s10_40: 1.310, s40+: 8.968
10 | # Number of validation image pairs: 1041
11 | # Validation Sintel (clean) EPE: 1.495, 1px: 0.161, 3px: 0.059, 5px: 0.040
12 | # Validation Sintel (clean) s0_10: 0.457, s10_40: 1.770, s40+: 8.257
13 | # Number of validation image pairs: 1041
14 | # Validation Sintel (final) EPE: 2.955, 1px: 0.209, 3px: 0.098, 5px: 0.071
15 | # Validation Sintel (final) s0_10: 0.725, s10_40: 3.446, s40+: 17.701
16 | 
17 | CUDA_VISIBLE_DEVICES=0 python main.py \
18 | --eval \
19 | --resume pretrained/gmflow_things-e9887eda.pth \
20 | --val_dataset things sintel \
21 | --with_speed_metric
22 | 
23 | 
24 | 
25 | # evaluate GMFlow with refinement
26 | 
27 | # evaluate chairs & things trained model on things and sintel (Table 3 of GMFlow paper)
28 | # the output should be:
29 | # Validation Things test set (things_clean) EPE: 2.804
30 | # Validation Things test (things_clean) s0_10: 0.527, s10_40: 1.009, s40+: 7.314
31 | # Number of validation image pairs: 1041
32 | # Validation Sintel (clean) EPE: 1.084, 1px: 0.092, 3px: 0.040, 5px: 0.028
33 | # Validation Sintel (clean) s0_10: 0.303, s10_40: 1.252, s40+: 6.261
34 | # Number of validation image pairs: 1041
35 | # Validation Sintel (final) EPE: 2.475, 1px: 0.147, 3px: 0.077, 5px: 0.058
36 | # Validation Sintel (final) s0_10: 0.511, s10_40: 2.810, s40+: 15.669
37 | 
38 | CUDA_VISIBLE_DEVICES=0 python main.py \
39 | --eval \
40 | --resume pretrained/gmflow_with_refine_things-36579974.pth \
41 | --val_dataset things sintel \
42 | --with_speed_metric \
43 | --padding_factor 32 \
44 | --upsample_factor 4 \
45 | --num_scales 2 \
46 | --attn_splits_list 2 8 \
47 | --corr_radius_list -1 4 \
48 | --prop_radius_list -1 1
49 | 
50 | 
51 | 
52 | # evaluate matched & matched on sintel
53 | 
54 | # evaluate GMFlow without refinement
55 | 
56 | CUDA_VISIBLE_DEVICES=0 python main.py \
57 | --eval \
58 | --evaluate_matched_unmatched \
59 | --resume pretrained/gmflow_things-e9887eda.pth \
60 | --val_dataset sintel
61 | 
62 | # evaluate GMFlow with refinement
63 | 
64 | CUDA_VISIBLE_DEVICES=0 python main.py \
65 | --eval \
66 | --evaluate_matched_unmatched \
67 | --resume pretrained/gmflow_with_refine_things-36579974.pth \
68 | --val_dataset sintel \
69 | --with_speed_metric \
70 | --padding_factor 32 \
71 | --upsample_factor 4 \
72 | --num_scales 2 \
73 | --attn_splits_list 2 8 \
74 | --corr_radius_list -1 4 \
75 | --prop_radius_list -1 1
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/egomono4d/misc/depth.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import cv2
 3 | import pdb
 4 | import os
 5 | import json
 6 | import numpy as np 
 7 | from PIL import Image
 8 | from unidepth.models import UniDepthV2
 9 | 
10 | def get_depth_estimator(estimator_name="unidepth_v2_large", cache_dir='.cache', device='cuda'):
11 |     # pdb.set_trace()
12 |     if estimator_name in ["unidepth_v2_large", "unidepth_v2_small"]:
13 |         version = 'v2'
14 |         if estimator_name.endswith('large'):
15 |             backbone = 'vitl'
16 |         elif estimator_name.endswith('small'):
17 |             backbone = 'vits'
18 |         with open(os.path.join(cache_dir, "unidepth_v2_checkpoints", f"unidepth-{version}-{backbone}14.json")) as f:
19 |             config = json.load(f)
20 |         model = UniDepthV2(config)
21 |         model_dir = os.path.join(cache_dir, "unidepth_v2_checkpoints", f"unidepth-{version}-{backbone}14.bin")
22 |         model.load_state_dict(torch.load(model_dir, map_location='cpu'))
23 |         model = model.to(device).eval()
24 |         return model
25 |     else:
26 |         raise ValueError(f"Unsupport Depth Estimator: {estimator_name}. Supportion: [depth_anything_v2_large].")
27 | 
28 | 
29 | 
30 | def estimate_relative_depth(pil_image: Image.Image, 
31 |                             model,
32 |                             estimator_name="unidepth_v2_large"):
33 |     if estimator_name in ['unidepth_v2_large', 'unidepth_v2_small']:
34 |         rgb = torch.from_numpy(np.array(pil_image)).permute(2, 0, 1) # C, H, W
35 |         predictions = model.infer(rgb)
36 |         predictions['depth'] = predictions['depth'].cpu().detach().numpy()[0,0]
37 |         predictions['intrinsics'] = predictions['intrinsics'].cpu().detach().numpy()[0]
38 |         predictions['points'] = predictions['points'].cpu().detach().numpy()[0].transpose(1,2,0)
39 |         return predictions
40 |     else:
41 |         raise ValueError("Unsupport Disparity-Depth Estimator: {estimator_name}. Supportion: [depth_anything_v2_large, depth_anything_v2_large_indoor].")
42 | 
43 | 
44 | def save_estimate_disparity_png(e_dep, e_dep_fp_img):
45 |     # black: 0    <---->    white: 1
46 |     # we follow that more closer to camera, more closer to white color. 
47 |     e_dep = (255 * (e_dep - e_dep.min()) / (e_dep.max() - e_dep.min())).astype(np.uint8)
48 |     e_dep_img = Image.fromarray(e_dep, mode='L')
49 |     e_dep_img.save(e_dep_fp_img)
50 | 
51 | 
52 | def save_estimate_depth_png(e_dep, e_dep_fp_img):
53 |     # black: 1 (deeper)   <---->    white: 0 (closer)
54 |     e_dep = np.log(e_dep)           # for depth we first conduct log for it.
55 |     dp_norm = (e_dep - e_dep.min()) / (e_dep.max() - e_dep.min())
56 |     e_dep = 255 * (1.0 - dp_norm)
57 |     if e_dep.dtype != np.uint8:
58 |         e_dep = e_dep.astype(np.uint8)
59 |     e_dep_img = Image.fromarray(e_dep, mode='L')
60 |     e_dep_img.save(e_dep_fp_img)
61 | 


--------------------------------------------------------------------------------
/egomono4d/model/extrinsics/extrinsics_regressed.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | import torch
 5 | from einops import rearrange
 6 | from jaxtyping import Float
 7 | from torch import Tensor, nn
 8 | 
 9 | from ...dataset.types import Batch
10 | from ...flow.flow_predictor import Flows
11 | from ..backbone.backbone import BackboneOutput
12 | from ..projection import get_extrinsics
13 | from .extrinsics import Extrinsics
14 | 
15 | 
16 | # https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/transforms/rotation_conversions.py
17 | def quaternion_to_matrix(
18 |     quaternions: Float[Tensor, "*batch 4"],
19 |     eps: float = 1e-8,
20 | ) -> Float[Tensor, "*batch 3 3"]:
21 |     # Order changed to match scipy format!
22 |     i, j, k, r = torch.unbind(quaternions, dim=-1)
23 |     two_s = 2 / ((quaternions * quaternions).sum(dim=-1) + eps)
24 | 
25 |     o = torch.stack(
26 |         (
27 |             1 - two_s * (j * j + k * k),
28 |             two_s * (i * j - k * r),
29 |             two_s * (i * k + j * r),
30 |             two_s * (i * j + k * r),
31 |             1 - two_s * (i * i + k * k),
32 |             two_s * (j * k - i * r),
33 |             two_s * (i * k - j * r),
34 |             two_s * (j * k + i * r),
35 |             1 - two_s * (i * i + j * j),
36 |         ),
37 |         -1,
38 |     )
39 |     return rearrange(o, "... (i j) -> ... i j", i=3, j=3)
40 | 
41 | 
42 | @dataclass
43 | class ExtrinsicsRegressedCfg:
44 |     name: Literal["regressed"]
45 | 
46 | 
47 | class ExtrinsicsRegressed(Extrinsics[ExtrinsicsRegressedCfg]):
48 |     def __init__(
49 |         self,
50 |         cfg: ExtrinsicsRegressedCfg,
51 |         num_frames: int,
52 |     ) -> None:
53 |         super().__init__(cfg, num_frames)
54 | 
55 |         assert num_frames >= 2
56 | 
57 |         # Initialize identity translations and rotations.
58 |         self.translations = nn.Parameter(
59 |             torch.zeros((num_frames - 1, 3), dtype=torch.float32)
60 |         )
61 |         rotations = torch.zeros((num_frames - 1, 4), dtype=torch.float32)
62 |         rotations[:, -1] = 1
63 |         self.rotations = nn.Parameter(rotations)
64 | 
65 |     def forward(
66 |         self,
67 |         batch: Batch,
68 |         flows: Flows,
69 |         backbone_output: BackboneOutput,
70 |         surfaces: Float[Tensor, "batch frame height width 3"],
71 |     ) -> Float[Tensor, "batch frame 4 4"]:
72 |         device = surfaces.device
73 |         b, f, _, _, _ = surfaces.shape
74 | 
75 |         # Regressing the extrinsics only makes sense during overfitting.
76 |         assert b == 1
77 | 
78 |         tf = torch.eye(4, dtype=torch.float32, device=device)
79 |         tf = tf.broadcast_to((f - 1, 4, 4)).contiguous()
80 |         tf[:, :3, :3] = quaternion_to_matrix(self.rotations)
81 |         tf[:, :3, 3] = self.translations
82 | 
83 |         return get_extrinsics(tf)[None]
84 | 


--------------------------------------------------------------------------------
/egomono4d/flow/flow_predictor_gmflow.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import urllib.request
 4 | from dataclasses import dataclass
 5 | from pathlib import Path
 6 | from typing import Literal
 7 | 
 8 | import torch
 9 | from einops import rearrange
10 | from jaxtyping import Float
11 | from torch import Tensor
12 | 
13 | try:
14 |     from ..repo.gmflow.gmflow.gmflow import GMFlow
15 | except ImportError:
16 |     GMFlow = None
17 | 
18 | from .common import split_videos
19 | from .flow_predictor import FlowPredictor
20 | 
21 | 
22 | @dataclass
23 | class FlowPredictorGMFlowCfg:
24 |     name: Literal["gmflow"]
25 |     cache_dir: str | None
26 | 
27 | 
28 | class FlowPredictorGMFlow(FlowPredictor[FlowPredictorGMFlowCfg]):
29 |     def __init__(self, cfg: FlowPredictorGMFlowCfg) -> None:
30 |         super().__init__(cfg)
31 | 
32 |         # Warn that GMFlow isn't installed.
33 |         if GMFlow is None:
34 |             print(
35 |                 "Warning: GMFlow could not be imported. Did you forget to initialize "
36 |                 "the git submodules?"
37 |             )
38 |             sys.exit(1)
39 | 
40 |         # Ensure that the checkpoint exists.
41 |         checkpoint = "gmflow-scale1-mixdata-train320x576-4c3a6e9a.pth"
42 |         checkpoint_path = cfg.cache_dir + "/gmflow_checkpoints/" + checkpoint
43 |         if not os.path.exists(checkpoint_path):
44 |             os.makedirs(checkpoint_path, exist_ok=True)
45 |             print("Downloading GMFlow checkpoint.")
46 |             urllib.request.urlretrieve(
47 |                 f"https://s3.eu-central-1.amazonaws.com/avg-projects/unimatch/pretrained/{checkpoint}",
48 |                 checkpoint_path,
49 |             )
50 | 
51 |         # Set up the model.
52 |         self.model = GMFlow(
53 |             feature_channels=128,
54 |             num_scales=1,
55 |             upsample_factor=8,
56 |             num_head=1,
57 |             attention_type="swin",
58 |             ffn_dim_expansion=4,
59 |             num_transformer_layers=6,
60 |         )
61 | 
62 |         # Load the pre-trained checkpoint.
63 |         checkpoint = torch.load(checkpoint_path)
64 |         weights = checkpoint["model"] if "model" in checkpoint else checkpoint
65 |         self.model.load_state_dict(weights, strict=False)
66 | 
67 |     def forward(
68 |         self,
69 |         videos: Float[Tensor, "batch frame 3 height width"],
70 |     ) -> Float[Tensor, "batch frame-1 height width 2"]:
71 |         source, target, b, f = split_videos(videos)
72 | 
73 |         result = self.model(
74 |             source * 255,
75 |             target * 255,
76 |             attn_splits_list=[2],
77 |             corr_radius_list=[-1],
78 |             prop_radius_list=[-1],
79 |             pred_bidir_flow=False,
80 |         )
81 |         flow = result["flow_preds"][-1]
82 | 
83 |         # Normalize the optical flow.
84 |         _, _, h, w = source.shape
85 |         wh = torch.tensor((w-1, h-1), dtype=torch.float32, device=flow.device)
86 |         return rearrange(flow, "(b f) xy h w -> b f h w xy", b=b, f=f - 1) / wh
87 | 


--------------------------------------------------------------------------------
/egomono4d/dataset/data_module_pretrain.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import os
 3 | from typing import Callable, Optional, List
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | from torch import Generator
 8 | from torch.utils.data import DataLoader, Dataset, IterableDataset, DistributedSampler
 9 | 
10 | from . import DatasetCfg, get_dataset
11 | from .types import Stage
12 | from lightning.pytorch import LightningDataModule as LightningDataModule
13 | from .data_module_pretrain_cfg import DataLoaderStageCfg, DataModulePretrainCfg
14 | 
15 | DatasetShim = Callable[[Dataset, Stage], Dataset]
16 | 
17 | 
18 | def worker_init_fn(worker_id: int) -> None:
19 |     random.seed(int(torch.utils.data.get_worker_info().seed) % (2**32 - 1))
20 |     np.random.seed(int(torch.utils.data.get_worker_info().seed) % (2**32 - 1))
21 | 
22 | 
23 | class DataModulePretrain(LightningDataModule):
24 |     def __init__(
25 |         self,
26 |         dataset_cfgs: List[DatasetCfg],
27 |         data_module_cfg: DataModulePretrainCfg,
28 |         global_rank: int,
29 |         world_size: int,
30 |         data_ratio: Optional[float]=1.0
31 |     ) -> None:
32 |         super().__init__()
33 |         self.dataset_cfgs = dataset_cfgs
34 |         self.data_module_cfg = data_module_cfg
35 |         self.global_rank = global_rank
36 |         self.world_size = world_size
37 |         self.data_ratio = data_ratio
38 | 
39 |     def get_persistent(self, loader_cfg: DataLoaderStageCfg):
40 |         return None if loader_cfg.num_workers == 0 else loader_cfg.persistent_workers
41 | 
42 |     def get_generator(self, loader_cfg: DataLoaderStageCfg):
43 |         if loader_cfg.seed is None:
44 |             return None
45 |         generator = Generator()
46 |         generator.manual_seed(loader_cfg.seed + self.global_rank)
47 |         return generator
48 | 
49 |     def train_dataloader(self):
50 |         dataset = get_dataset(self.dataset_cfgs, "train", global_rank=self.global_rank, world_size=self.world_size, data_ratio=self.data_ratio)
51 |         print(f"train_batch_size = {self.data_module_cfg.train.batch_size}")
52 |         return DataLoader(
53 |             dataset,
54 |             self.data_module_cfg.train.batch_size,
55 |             shuffle=not isinstance(dataset, IterableDataset),
56 |             num_workers=self.data_module_cfg.train.num_workers,
57 |             generator=self.get_generator(self.data_module_cfg.train),
58 |             worker_init_fn=worker_init_fn,
59 |             persistent_workers=self.get_persistent(self.data_module_cfg.train),
60 |         )
61 | 
62 |     def val_dataloader(self):
63 |         dataset = get_dataset(self.dataset_cfgs, "val", global_rank=self.global_rank, world_size=self.world_size, data_ratio=self.data_ratio)
64 |         print(f"validation_batch_size = {self.data_module_cfg.val.batch_size}")
65 |         return DataLoader(
66 |             dataset,
67 |             self.data_module_cfg.val.batch_size,
68 |             num_workers=self.data_module_cfg.val.num_workers,
69 |             generator=self.get_generator(self.data_module_cfg.val),
70 |             worker_init_fn=worker_init_fn,
71 |             persistent_workers=self.get_persistent(self.data_module_cfg.val),
72 |         )
73 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/gmflow/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .position import PositionEmbeddingSine
 3 | 
 4 | 
 5 | def split_feature(feature,
 6 |                   num_splits=2,
 7 |                   channel_last=False,
 8 |                   ):
 9 |     if channel_last:  # [B, H, W, C]
10 |         b, h, w, c = feature.size()
11 |         assert h % num_splits == 0 and w % num_splits == 0
12 | 
13 |         b_new = b * num_splits * num_splits
14 |         h_new = h // num_splits
15 |         w_new = w // num_splits
16 | 
17 |         feature = feature.view(b, num_splits, h // num_splits, num_splits, w // num_splits, c
18 |                                ).permute(0, 1, 3, 2, 4, 5).reshape(b_new, h_new, w_new, c)  # [B*K*K, H/K, W/K, C]
19 |     else:  # [B, C, H, W]
20 |         b, c, h, w = feature.size()
21 |         assert h % num_splits == 0 and w % num_splits == 0
22 | 
23 |         b_new = b * num_splits * num_splits
24 |         h_new = h // num_splits
25 |         w_new = w // num_splits
26 | 
27 |         feature = feature.view(b, c, num_splits, h // num_splits, num_splits, w // num_splits
28 |                                ).permute(0, 2, 4, 1, 3, 5).reshape(b_new, c, h_new, w_new)  # [B*K*K, C, H/K, W/K]
29 | 
30 |     return feature
31 | 
32 | 
33 | def merge_splits(splits,
34 |                  num_splits=2,
35 |                  channel_last=False,
36 |                  ):
37 |     if channel_last:  # [B*K*K, H/K, W/K, C]
38 |         b, h, w, c = splits.size()
39 |         new_b = b // num_splits // num_splits
40 | 
41 |         splits = splits.view(new_b, num_splits, num_splits, h, w, c)
42 |         merge = splits.permute(0, 1, 3, 2, 4, 5).contiguous().view(
43 |             new_b, num_splits * h, num_splits * w, c)  # [B, H, W, C]
44 |     else:  # [B*K*K, C, H/K, W/K]
45 |         b, c, h, w = splits.size()
46 |         new_b = b // num_splits // num_splits
47 | 
48 |         splits = splits.view(new_b, num_splits, num_splits, c, h, w)
49 |         merge = splits.permute(0, 3, 1, 4, 2, 5).contiguous().view(
50 |             new_b, c, num_splits * h, num_splits * w)  # [B, C, H, W]
51 | 
52 |     return merge
53 | 
54 | 
55 | def normalize_img(img0, img1):
56 |     # loaded images are in [0, 255]
57 |     # normalize by ImageNet mean and std
58 |     mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(img1.device)
59 |     std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(img1.device)
60 |     img0 = (img0 / 255. - mean) / std
61 |     img1 = (img1 / 255. - mean) / std
62 | 
63 |     return img0, img1
64 | 
65 | 
66 | def feature_add_position(feature0, feature1, attn_splits, feature_channels):
67 |     pos_enc = PositionEmbeddingSine(num_pos_feats=feature_channels // 2)
68 | 
69 |     if attn_splits > 1:  # add position in splited window
70 |         feature0_splits = split_feature(feature0, num_splits=attn_splits)
71 |         feature1_splits = split_feature(feature1, num_splits=attn_splits)
72 | 
73 |         position = pos_enc(feature0_splits)
74 | 
75 |         feature0_splits = feature0_splits + position
76 |         feature1_splits = feature1_splits + position
77 | 
78 |         feature0 = merge_splits(feature0_splits, num_splits=attn_splits)
79 |         feature1 = merge_splits(feature1_splits, num_splits=attn_splits)
80 |     else:
81 |         position = pos_enc(feature0)
82 | 
83 |         feature0 = feature0 + position
84 |         feature1 = feature1 + position
85 | 
86 |     return feature0, feature1
87 | 


--------------------------------------------------------------------------------
/egomono4d/visualization/drawing/lines.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal, Optional
 2 | 
 3 | import torch
 4 | from einops import einsum, repeat
 5 | from jaxtyping import Float
 6 | from torch import Tensor
 7 | 
 8 | from .coordinate_conversion import generate_conversions
 9 | from .rendering import render_over_image
10 | from .types import Pair, Scalar, Vector, sanitize_scalar, sanitize_vector
11 | 
12 | 
13 | def draw_lines(
14 |     image: Float[Tensor, "3 height width"] | Float[Tensor, "4 height width"],
15 |     start: Vector,
16 |     end: Vector,
17 |     color: Vector,
18 |     width: Scalar,
19 |     cap: Literal["butt", "round", "square"] = "round",
20 |     num_msaa_passes: int = 1,
21 |     x_range: Optional[Pair] = None,
22 |     y_range: Optional[Pair] = None,
23 | ) -> Float[Tensor, "3 height width"] | Float[Tensor, "4 height width"]:
24 |     device = image.device
25 |     start = sanitize_vector(start, 2, device)
26 |     end = sanitize_vector(end, 2, device)
27 |     color = sanitize_vector(color, 3, device)
28 |     width = sanitize_scalar(width, device)
29 |     (num_lines,) = torch.broadcast_shapes(
30 |         start.shape[0],
31 |         end.shape[0],
32 |         color.shape[0],
33 |         width.shape,
34 |     )
35 | 
36 |     # Convert world-space points to pixel space.
37 |     _, h, w = image.shape
38 |     world_to_pixel, _ = generate_conversions((h, w), device, x_range, y_range)
39 |     start = world_to_pixel(start)
40 |     end = world_to_pixel(end)
41 | 
42 |     def color_function(
43 |         xy: Float[Tensor, "point 2"],
44 |     ) -> Float[Tensor, "point 4"]:
45 |         # Define a vector between the start and end points.
46 |         delta = end - start
47 |         delta_norm = delta.norm(dim=-1, keepdim=True)
48 |         u_delta = delta / delta_norm
49 | 
50 |         # Define a vector between each sample and the start point.
51 |         indicator = xy - start[:, None]
52 | 
53 |         # Determine whether each sample is inside the line in the parallel direction.
54 |         extra = 0.5 * width[:, None] if cap == "square" else 0
55 |         parallel = einsum(u_delta, indicator, "l xy, l s xy -> l s")
56 |         parallel_inside_line = (parallel <= delta_norm + extra) & (parallel > -extra)
57 | 
58 |         # Determine whether each sample is inside the line perpendicularly.
59 |         perpendicular = indicator - parallel[..., None] * u_delta[:, None]
60 |         perpendicular_inside_line = perpendicular.norm(dim=-1) < 0.5 * width[:, None]
61 | 
62 |         inside_line = parallel_inside_line & perpendicular_inside_line
63 | 
64 |         # Compute round caps.
65 |         if cap == "round":
66 |             near_start = indicator.norm(dim=-1) < 0.5 * width[:, None]
67 |             inside_line |= near_start
68 |             end_indicator = indicator = xy - end[:, None]
69 |             near_end = end_indicator.norm(dim=-1) < 0.5 * width[:, None]
70 |             inside_line |= near_end
71 | 
72 |         # Determine the sample's color.
73 |         selectable_color = color.broadcast_to((num_lines, 3))
74 |         arrangement = inside_line * torch.arange(num_lines, device=device)[:, None]
75 |         top_color = selectable_color.gather(
76 |             dim=0,
77 |             index=repeat(arrangement.argmax(dim=0), "s -> s c", c=3),
78 |         )
79 |         rgba = torch.cat((top_color, inside_line.any(dim=0).float()[:, None]), dim=-1)
80 | 
81 |         return rgba
82 | 
83 |     return render_over_image(image, color_function, device, num_passes=num_msaa_passes)
84 | 


--------------------------------------------------------------------------------
/egomono4d/eval/eval_pointcloud.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import open3d as o3d
 4 | import pdb
 5 | import copy
 6 | import torch.nn.functional as F
 7 | from einops import einsum, rearrange
 8 | from ..model.procrustes import align_scaled_rigid
 9 | import kaolin as kal
10 | 
11 | FLY_THRESHOLD = 0.05
12 | 
13 | def eval_pointcloud_conductor(pred_pcd, gt_pcd, gt_flys, rgbs, commit=""):   # (b, f, h, w, 3) * 2, (b, f, h, w)
14 |     """ An implementation based on Kaolin library. """
15 |     
16 |     b, f, h, w, _ = pred_pcd.shape
17 | 
18 |     pred_pcd_align = pred_pcd.reshape(b, f*h*w, 3)
19 |     gt_pcd_align = gt_pcd.reshape(b, f*h*w, 3)
20 |     gt_flys_align = gt_flys.reshape(b, f*h*w)
21 | 
22 |     delta_ext_scale, scale = align_scaled_rigid(pred_pcd_align, gt_pcd_align, gt_flys_align) 
23 |     pred_pcd_align = torch.matmul(delta_ext_scale[:, :3,:3], pred_pcd_align.permute(0,2,1)).permute(0,2,1) + delta_ext_scale[:, :3, -1][:, None]
24 | 
25 |     pred_pcd_align = pred_pcd_align.reshape(b, f, h*w, 3)
26 |     gt_pcd_align = gt_pcd_align.reshape(b, f, h*w, 3)
27 |     gt_flys_align = gt_flys_align.reshape(b, f, h*w)
28 | 
29 |     ######################################## VIS ###############################################
30 |     # vis_flys = gt_flys_align.reshape(b, -1)
31 |     # rgbss = rgbs.permute(0,1,3,4,2)
32 |     # rgbss = rgbss.reshape(b, -1, 3)[vis_flys == 1]
33 |     # ppa = pred_pcd_align.reshape(b, -1, 3)[vis_flys == 1].reshape(-1, 3)
34 |     # pcd = o3d.geometry.PointCloud() 
35 |     # pcd.points = o3d.utility.Vector3dVector(np.array(ppa.cpu().detach().reshape(-1, 3)))
36 |     # pcd.colors = o3d.utility.Vector3dVector(np.array(rgbss.cpu().detach().reshape(-1, 3)))
37 |     # o3d.io.write_point_cloud(f"pcd_pred_{commit}"+".ply", pcd)
38 |     
39 |     # pcd = o3d.geometry.PointCloud()
40 |     # gpa = gt_pcd_align.reshape(b, -1, 3)[vis_flys == 1].reshape(-1, 3)
41 |     # pcd.points = o3d.utility.Vector3dVector(np.array(gpa.cpu().detach().reshape(-1, 3)))
42 |     # pcd.colors = o3d.utility.Vector3dVector(np.array(rgbss.cpu().detach().reshape(-1, 3)))
43 |     # o3d.io.write_point_cloud(f"pcd_gt_{commit}"+".ply", pcd)
44 |     ######################################## VIS ###############################################
45 |     
46 |     cds, f001, f0025, f005, f01 = [], [], [], [], []
47 |     n_f = pred_pcd_align.shape[0]
48 |     for i in range(n_f):
49 |         gt_fly_f = gt_flys_align[:, i]
50 |         pred_pcd_f = pred_pcd_align[:, i][gt_fly_f == 1][None]
51 |         gt_pcd_f = gt_pcd_align[:, i][gt_fly_f == 1][None]
52 |         cd = kal.metrics.pointcloud.chamfer_distance(pred_pcd_f, gt_pcd_f)
53 |         cds.append(cd)
54 |         f001.append(kal.metrics.pointcloud.f_score(pred_pcd_f, gt_pcd_f, radius=0.01))
55 |         f0025.append(kal.metrics.pointcloud.f_score(pred_pcd_f, gt_pcd_f, radius=0.025)) 
56 |         f005.append(kal.metrics.pointcloud.f_score(pred_pcd_f, gt_pcd_f, radius=0.05))
57 |         f01.append(kal.metrics.pointcloud.f_score(pred_pcd_f, gt_pcd_f, radius=0.1))
58 | 
59 |     cd = sum(cds) / (b*n_f)
60 |     f_score_001 = sum(f001) / (b*n_f)
61 |     f_score_0025 = sum(f0025) / (b*n_f)
62 |     f_score_005 = sum(f005) / (b*n_f)
63 |     f_score_01 = sum(f01) / (b*n_f)
64 | 
65 |     return {
66 |         "PCD_ChamferDistance(mm)": 1000.0 * cd.item(),
67 |         "PCD_FScore_[.01]": 100.0*f_score_001.item(),
68 |         "PCD_FScore_[.025]": 100.0*f_score_0025.item(),
69 |         "PCD_FScore_[.05]": 100.0*f_score_005.item(),
70 |         "PCD_FScore_[.1]": 100.0*f_score_01.item(),
71 |     }
72 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/gmflow/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def coords_grid(b, h, w, homogeneous=False, device=None):
 6 |     y, x = torch.meshgrid(torch.arange(h), torch.arange(w))  # [H, W]
 7 | 
 8 |     stacks = [x, y]
 9 | 
10 |     if homogeneous:
11 |         ones = torch.ones_like(x)  # [H, W]
12 |         stacks.append(ones)
13 | 
14 |     grid = torch.stack(stacks, dim=0).float()  # [2, H, W] or [3, H, W]
15 | 
16 |     grid = grid[None].repeat(b, 1, 1, 1)  # [B, 2, H, W] or [B, 3, H, W]
17 | 
18 |     if device is not None:
19 |         grid = grid.to(device)
20 | 
21 |     return grid
22 | 
23 | 
24 | def generate_window_grid(h_min, h_max, w_min, w_max, len_h, len_w, device=None):
25 |     assert device is not None
26 | 
27 |     x, y = torch.meshgrid([torch.linspace(w_min, w_max, len_w, device=device),
28 |                            torch.linspace(h_min, h_max, len_h, device=device)],
29 |                           )
30 |     grid = torch.stack((x, y), -1).transpose(0, 1).float()  # [H, W, 2]
31 | 
32 |     return grid
33 | 
34 | 
35 | def normalize_coords(coords, h, w):
36 |     # coords: [B, H, W, 2]
37 |     c = torch.Tensor([(w - 1) / 2., (h - 1) / 2.]).float().to(coords.device)
38 |     return (coords - c) / c  # [-1, 1]
39 | 
40 | 
41 | def bilinear_sample(img, sample_coords, mode='bilinear', padding_mode='zeros', return_mask=False):
42 |     # img: [B, C, H, W]
43 |     # sample_coords: [B, 2, H, W] in image scale
44 |     if sample_coords.size(1) != 2:  # [B, H, W, 2]
45 |         sample_coords = sample_coords.permute(0, 3, 1, 2)
46 | 
47 |     b, _, h, w = sample_coords.shape
48 | 
49 |     # Normalize to [-1, 1]
50 |     x_grid = 2 * sample_coords[:, 0] / (w - 1) - 1
51 |     y_grid = 2 * sample_coords[:, 1] / (h - 1) - 1
52 | 
53 |     grid = torch.stack([x_grid, y_grid], dim=-1)  # [B, H, W, 2]
54 | 
55 |     img = F.grid_sample(img, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
56 | 
57 |     if return_mask:
58 |         mask = (x_grid >= -1) & (y_grid >= -1) & (x_grid <= 1) & (y_grid <= 1)  # [B, H, W]
59 | 
60 |         return img, mask
61 | 
62 |     return img
63 | 
64 | 
65 | def flow_warp(feature, flow, mask=False, padding_mode='zeros'):
66 |     b, c, h, w = feature.size()
67 |     assert flow.size(1) == 2
68 | 
69 |     grid = coords_grid(b, h, w).to(flow.device) + flow  # [B, 2, H, W]
70 | 
71 |     return bilinear_sample(feature, grid, padding_mode=padding_mode,
72 |                            return_mask=mask)
73 | 
74 | 
75 | def forward_backward_consistency_check(fwd_flow, bwd_flow,
76 |                                        alpha=0.01,
77 |                                        beta=0.5
78 |                                        ):
79 |     # fwd_flow, bwd_flow: [B, 2, H, W]
80 |     # alpha and beta values are following UnFlow (https://arxiv.org/abs/1711.07837)
81 |     assert fwd_flow.dim() == 4 and bwd_flow.dim() == 4
82 |     assert fwd_flow.size(1) == 2 and bwd_flow.size(1) == 2
83 |     flow_mag = torch.norm(fwd_flow, dim=1) + torch.norm(bwd_flow, dim=1)  # [B, H, W]
84 | 
85 |     warped_bwd_flow = flow_warp(bwd_flow, fwd_flow)  # [B, 2, H, W]
86 |     warped_fwd_flow = flow_warp(fwd_flow, bwd_flow)  # [B, 2, H, W]
87 | 
88 |     diff_fwd = torch.norm(fwd_flow + warped_bwd_flow, dim=1)  # [B, H, W]
89 |     diff_bwd = torch.norm(bwd_flow + warped_fwd_flow, dim=1)
90 | 
91 |     threshold = alpha * flow_mag + beta
92 | 
93 |     fwd_occ = (diff_fwd > threshold).float()  # [B, H, W]
94 |     bwd_occ = (diff_bwd > threshold).float()
95 | 
96 |     return fwd_occ, bwd_occ
97 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/gmflow/trident_conv.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # https://github.com/facebookresearch/detectron2/blob/main/projects/TridentNet/tridentnet/trident_conv.py
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | from torch.nn import functional as F
 7 | from torch.nn.modules.utils import _pair
 8 | 
 9 | 
10 | class MultiScaleTridentConv(nn.Module):
11 |     def __init__(
12 |             self,
13 |             in_channels,
14 |             out_channels,
15 |             kernel_size,
16 |             stride=1,
17 |             strides=1,
18 |             paddings=0,
19 |             dilations=1,
20 |             dilation=1,
21 |             groups=1,
22 |             num_branch=1,
23 |             test_branch_idx=-1,
24 |             bias=False,
25 |             norm=None,
26 |             activation=None,
27 |     ):
28 |         super(MultiScaleTridentConv, self).__init__()
29 |         self.in_channels = in_channels
30 |         self.out_channels = out_channels
31 |         self.kernel_size = _pair(kernel_size)
32 |         self.num_branch = num_branch
33 |         self.stride = _pair(stride)
34 |         self.groups = groups
35 |         self.with_bias = bias
36 |         self.dilation = dilation
37 |         if isinstance(paddings, int):
38 |             paddings = [paddings] * self.num_branch
39 |         if isinstance(dilations, int):
40 |             dilations = [dilations] * self.num_branch
41 |         if isinstance(strides, int):
42 |             strides = [strides] * self.num_branch
43 |         self.paddings = [_pair(padding) for padding in paddings]
44 |         self.dilations = [_pair(dilation) for dilation in dilations]
45 |         self.strides = [_pair(stride) for stride in strides]
46 |         self.test_branch_idx = test_branch_idx
47 |         self.norm = norm
48 |         self.activation = activation
49 | 
50 |         assert len({self.num_branch, len(self.paddings), len(self.strides)}) == 1
51 | 
52 |         self.weight = nn.Parameter(
53 |             torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)
54 |         )
55 |         if bias:
56 |             self.bias = nn.Parameter(torch.Tensor(out_channels))
57 |         else:
58 |             self.bias = None
59 | 
60 |         nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
61 |         if self.bias is not None:
62 |             nn.init.constant_(self.bias, 0)
63 | 
64 |     def forward(self, inputs):
65 |         num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1
66 |         assert len(inputs) == num_branch
67 | 
68 |         if self.training or self.test_branch_idx == -1:
69 |             outputs = [
70 |                 F.conv2d(input, self.weight, self.bias, stride, padding, self.dilation, self.groups)
71 |                 for input, stride, padding in zip(inputs, self.strides, self.paddings)
72 |             ]
73 |         else:
74 |             outputs = [
75 |                 F.conv2d(
76 |                     inputs[0],
77 |                     self.weight,
78 |                     self.bias,
79 |                     self.strides[self.test_branch_idx] if self.test_branch_idx == -1 else self.strides[-1],
80 |                     self.paddings[self.test_branch_idx] if self.test_branch_idx == -1 else self.paddings[-1],
81 |                     self.dilation,
82 |                     self.groups,
83 |                 )
84 |             ]
85 | 
86 |         if self.norm is not None:
87 |             outputs = [self.norm(x) for x in outputs]
88 |         if self.activation is not None:
89 |             outputs = [self.activation(x) for x in outputs]
90 |         return outputs
91 | 


--------------------------------------------------------------------------------
/egomono4d/misc/fly.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import torch
  4 | 
  5 | 
  6 | def detect_flying_pixels(depth_map, threshold=10):
  7 |     # # depth_map: (h, w)
  8 | 
  9 |     depth_dx, depth_dy = np.gradient(depth_map)
 10 |     depth_grad = np.sqrt(depth_dx**2 + depth_dy**2)
 11 |     flying_pixels = depth_grad > threshold
 12 | 
 13 |     return flying_pixels
 14 | 
 15 | 
 16 | def detect_sequence_flying_pixels(depth_sequence, threshold=10):
 17 |     """
 18 |     Process a sequence of depth maps to detect flying pixels.
 19 | 
 20 |     Parameters:
 21 |     depth_sequence (numpy.ndarray): The input depth sequence with shape (f, h, w).
 22 |     threshold (int): The threshold for detecting depth discontinuities.
 23 | 
 24 |     Returns:
 25 |     numpy.ndarray: A binary sequence where flying pixels are marked as 1.
 26 |     """
 27 |     f, h, w = depth_sequence.shape
 28 |     flying_pixels_sequence = np.zeros((f, h, w), dtype=np.uint8)
 29 | 
 30 |     for i in range(f):
 31 |         flying_pixels_sequence[i] = detect_flying_pixels(depth_sequence[i], threshold)
 32 | 
 33 |     return flying_pixels_sequence
 34 | 
 35 | 
 36 | def calculate_edge_scale_torch(surfaces, fly_masks):
 37 |     """
 38 |     Get the scale of the point cloud defined with mean edge distance.
 39 |     scale = \sigma_{(i,j) in edges} ||p_i - p_j||
 40 | 
 41 |     Inputs:
 42 |         surfaces:  torch.Tensor[batch*, h, w, 3]
 43 |         fly_masks:   torch.Tensor[batch*, h, w]
 44 | 
 45 |     Return:
 46 |         scale: torch.Tensor[batch*]
 47 |     """
 48 | 
 49 |     dist_right = torch.norm(surfaces[..., :, 1:, :] - surfaces[..., :, :-1, :], dim=-1)
 50 |     dist_down = torch.norm(surfaces[..., 1:, :, :] - surfaces[..., :-1, :, :], dim=-1)
 51 |     mask_right = fly_masks[..., :, 1:] * fly_masks[..., :, :-1]
 52 |     mask_down = fly_masks[..., 1:, :] * fly_masks[..., :-1, :]
 53 | 
 54 |     scale_right = (dist_right * mask_right).sum(dim=[-1,-2]) / mask_right.sum(dim=[-1,-2])
 55 |     scale_left = (dist_down * mask_down).sum(dim=[-1,-2]) / mask_down.sum(dim=[-1,-2])
 56 | 
 57 |     scale_edge = (scale_right + scale_left) * 0.5
 58 |     return scale_edge
 59 | 
 60 | 
 61 | def calculate_scale_pts(pts):  # (n, 3)
 62 |     
 63 |     n, _ = pts.shape 
 64 |     surfaces_flat = pts[None]
 65 | 
 66 |     centroids = torch.mean(surfaces_flat, dim=1, keepdim=True)
 67 |     centered_points = surfaces_flat - centroids
 68 | 
 69 |     cov_matrices = torch.bmm(centered_points.transpose(1, 2), centered_points) / n
 70 |     eigenvalues, _ = torch.linalg.eigh(cov_matrices)  # (batch*, 3)
 71 |     scale = torch.sqrt(eigenvalues[:, -1])            # pick the largest PCA item
 72 | 
 73 |     return scale[0]
 74 | 
 75 | 
 76 | def calculate_scale_torch(surfaces):
 77 |     """
 78 |     Get the scale of the point cloud defined with PCA analysis.
 79 |     scale = \sigma_{(i,j) in edges} ||p_i - p_j||
 80 | 
 81 |     Inputs:
 82 |         surfaces:  torch.Tensor[batch*, h, w, 3]
 83 |         fly_masks:   torch.Tensor[batch*, h, w]
 84 | 
 85 |     Return:
 86 |         scale: torch.Tensor[batch*]
 87 |     """
 88 |     
 89 |     batch_shape = surfaces.shape[:-3]
 90 |     h, w = surfaces.shape[-3:-1]
 91 |     surfaces_flat = surfaces.view(-1, h * w, 3)  # (batch*, h*w, 3)
 92 | 
 93 |     centroids = torch.mean(surfaces_flat, dim=1, keepdim=True)
 94 |     centered_points = surfaces_flat - centroids
 95 | 
 96 |     cov_matrices = torch.bmm(centered_points.transpose(1, 2), centered_points) / (h * w)
 97 |     eigenvalues, _ = torch.linalg.eigh(cov_matrices)  # (batch*, 3)
 98 |     scale = torch.sqrt(eigenvalues[:, -1])            # pick the largest PCA item
 99 |     scale = scale.reshape(batch_shape)
100 | 
101 |     return scale
102 | 
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/gmflow/matching.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from .geometry import coords_grid, generate_window_grid, normalize_coords
 5 | 
 6 | 
 7 | def global_correlation_softmax(feature0, feature1,
 8 |                                pred_bidir_flow=False,
 9 |                                ):
10 |     # global correlation
11 |     b, c, h, w = feature0.shape
12 |     feature0 = feature0.view(b, c, -1).permute(0, 2, 1)  # [B, H*W, C]
13 |     feature1 = feature1.view(b, c, -1)  # [B, C, H*W]
14 | 
15 |     correlation = torch.matmul(feature0, feature1).view(b, h, w, h, w) / (c ** 0.5)  # [B, H, W, H, W]
16 | 
17 |     # flow from softmax
18 |     init_grid = coords_grid(b, h, w).to(correlation.device)  # [B, 2, H, W]
19 |     grid = init_grid.view(b, 2, -1).permute(0, 2, 1)  # [B, H*W, 2]
20 | 
21 |     correlation = correlation.view(b, h * w, h * w)  # [B, H*W, H*W]
22 | 
23 |     if pred_bidir_flow:
24 |         correlation = torch.cat((correlation, correlation.permute(0, 2, 1)), dim=0)  # [2*B, H*W, H*W]
25 |         init_grid = init_grid.repeat(2, 1, 1, 1)  # [2*B, 2, H, W]
26 |         grid = grid.repeat(2, 1, 1)  # [2*B, H*W, 2]
27 |         b = b * 2
28 | 
29 |     prob = F.softmax(correlation, dim=-1)  # [B, H*W, H*W]
30 | 
31 |     correspondence = torch.matmul(prob, grid).view(b, h, w, 2).permute(0, 3, 1, 2)  # [B, 2, H, W]
32 | 
33 |     # when predicting bidirectional flow, flow is the concatenation of forward flow and backward flow
34 |     flow = correspondence - init_grid
35 | 
36 |     return flow, prob
37 | 
38 | 
39 | def local_correlation_softmax(feature0, feature1, local_radius,
40 |                               padding_mode='zeros',
41 |                               ):
42 |     b, c, h, w = feature0.size()
43 |     coords_init = coords_grid(b, h, w).to(feature0.device)  # [B, 2, H, W]
44 |     coords = coords_init.view(b, 2, -1).permute(0, 2, 1)  # [B, H*W, 2]
45 | 
46 |     local_h = 2 * local_radius + 1
47 |     local_w = 2 * local_radius + 1
48 | 
49 |     window_grid = generate_window_grid(-local_radius, local_radius,
50 |                                        -local_radius, local_radius,
51 |                                        local_h, local_w, device=feature0.device)  # [2R+1, 2R+1, 2]
52 |     window_grid = window_grid.reshape(-1, 2).repeat(b, 1, 1, 1)  # [B, 1, (2R+1)^2, 2]
53 |     sample_coords = coords.unsqueeze(-2) + window_grid  # [B, H*W, (2R+1)^2, 2]
54 | 
55 |     sample_coords_softmax = sample_coords
56 | 
57 |     # exclude coords that are out of image space
58 |     valid_x = (sample_coords[:, :, :, 0] >= 0) & (sample_coords[:, :, :, 0] < w)  # [B, H*W, (2R+1)^2]
59 |     valid_y = (sample_coords[:, :, :, 1] >= 0) & (sample_coords[:, :, :, 1] < h)  # [B, H*W, (2R+1)^2]
60 | 
61 |     valid = valid_x & valid_y  # [B, H*W, (2R+1)^2], used to mask out invalid values when softmax
62 | 
63 |     # normalize coordinates to [-1, 1]
64 |     sample_coords_norm = normalize_coords(sample_coords, h, w)  # [-1, 1]
65 |     window_feature = F.grid_sample(feature1, sample_coords_norm,
66 |                                    padding_mode=padding_mode, align_corners=True
67 |                                    ).permute(0, 2, 1, 3)  # [B, H*W, C, (2R+1)^2]
68 |     feature0_view = feature0.permute(0, 2, 3, 1).view(b, h * w, 1, c)  # [B, H*W, 1, C]
69 | 
70 |     corr = torch.matmul(feature0_view, window_feature).view(b, h * w, -1) / (c ** 0.5)  # [B, H*W, (2R+1)^2]
71 | 
72 |     # mask invalid locations
73 |     corr[~valid] = -1e9
74 | 
75 |     prob = F.softmax(corr, -1)  # [B, H*W, (2R+1)^2]
76 | 
77 |     correspondence = torch.matmul(prob.unsqueeze(-2), sample_coords_softmax).squeeze(-2).view(
78 |         b, h, w, 2).permute(0, 3, 1, 2)  # [B, 2, H, W]
79 | 
80 |     flow = correspondence - coords_init
81 |     match_prob = prob
82 | 
83 |     return flow, match_prob
84 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/utils/dist_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | # https://github.com/open-mmlab/mmcv/blob/7540cf73ac7e5d1e14d0ffbd9b6759e83929ecfc/mmcv/runner/dist_utils.py
  3 | 
  4 | import os
  5 | import subprocess
  6 | 
  7 | import torch
  8 | import torch.multiprocessing as mp
  9 | from torch import distributed as dist
 10 | 
 11 | 
 12 | def init_dist(launcher, backend='nccl', **kwargs):
 13 |     if mp.get_start_method(allow_none=True) is None:
 14 |         mp.set_start_method('spawn')
 15 |     if launcher == 'pytorch':
 16 |         _init_dist_pytorch(backend, **kwargs)
 17 |     elif launcher == 'mpi':
 18 |         _init_dist_mpi(backend, **kwargs)
 19 |     elif launcher == 'slurm':
 20 |         _init_dist_slurm(backend, **kwargs)
 21 |     else:
 22 |         raise ValueError(f'Invalid launcher type: {launcher}')
 23 | 
 24 | 
 25 | def _init_dist_pytorch(backend, **kwargs):
 26 |     # TODO: use local_rank instead of rank % num_gpus
 27 |     rank = int(os.environ['RANK'])
 28 |     num_gpus = torch.cuda.device_count()
 29 |     torch.cuda.set_device(rank % num_gpus)
 30 |     dist.init_process_group(backend=backend, **kwargs)
 31 | 
 32 | 
 33 | def _init_dist_mpi(backend, **kwargs):
 34 |     rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
 35 |     num_gpus = torch.cuda.device_count()
 36 |     torch.cuda.set_device(rank % num_gpus)
 37 |     dist.init_process_group(backend=backend, **kwargs)
 38 | 
 39 | 
 40 | def _init_dist_slurm(backend, port=None):
 41 |     """Initialize slurm distributed training environment.
 42 |     If argument ``port`` is not specified, then the master port will be system
 43 |     environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
 44 |     environment variable, then a default port ``29500`` will be used.
 45 |     Args:
 46 |         backend (str): Backend of torch.distributed.
 47 |         port (int, optional): Master port. Defaults to None.
 48 |     """
 49 |     proc_id = int(os.environ['SLURM_PROCID'])
 50 |     ntasks = int(os.environ['SLURM_NTASKS'])
 51 |     node_list = os.environ['SLURM_NODELIST']
 52 |     num_gpus = torch.cuda.device_count()
 53 |     torch.cuda.set_device(proc_id % num_gpus)
 54 |     addr = subprocess.getoutput(
 55 |         f'scontrol show hostname {node_list} | head -n1')
 56 |     # specify master port
 57 |     if port is not None:
 58 |         os.environ['MASTER_PORT'] = str(port)
 59 |     elif 'MASTER_PORT' in os.environ:
 60 |         pass  # use MASTER_PORT in the environment variable
 61 |     else:
 62 |         # 29500 is torch.distributed default port
 63 |         os.environ['MASTER_PORT'] = '29500'
 64 |     # use MASTER_ADDR in the environment variable if it already exists
 65 |     if 'MASTER_ADDR' not in os.environ:
 66 |         os.environ['MASTER_ADDR'] = addr
 67 |     os.environ['WORLD_SIZE'] = str(ntasks)
 68 |     os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
 69 |     os.environ['RANK'] = str(proc_id)
 70 |     dist.init_process_group(backend=backend)
 71 | 
 72 | 
 73 | def get_dist_info():
 74 |     if dist.is_available():
 75 |         initialized = dist.is_initialized()
 76 |     else:
 77 |         initialized = False
 78 |     if initialized:
 79 |         rank = dist.get_rank()
 80 |         world_size = dist.get_world_size()
 81 |     else:
 82 |         rank = 0
 83 |         world_size = 1
 84 |     return rank, world_size
 85 | 
 86 | 
 87 | def setup_for_distributed(is_master):
 88 |     """
 89 |     This function disables printing when not in master process
 90 |     """
 91 |     import builtins as __builtin__
 92 |     builtin_print = __builtin__.print
 93 | 
 94 |     def print(*args, **kwargs):
 95 |         force = kwargs.pop('force', False)
 96 |         if is_master or force:
 97 |             builtin_print(*args, **kwargs)
 98 | 
 99 |     __builtin__.print = print
100 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/scripts/train_gmflow.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # GMFlow without refinement
  4 | 
  5 | # number of gpus for training, please set according to your hardware
  6 | # by default use all gpus on a machine
  7 | # can be trained on 4x 16GB V100 or 2x 32GB V100 or 2x 40GB A100 gpus
  8 | NUM_GPUS=4
  9 | 
 10 | # chairs
 11 | CHECKPOINT_DIR=checkpoints/chairs-gmflow && \
 12 | mkdir -p ${CHECKPOINT_DIR} && \
 13 | python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \
 14 | --launcher pytorch \
 15 | --checkpoint_dir ${CHECKPOINT_DIR} \
 16 | --batch_size 16 \
 17 | --val_dataset chairs sintel kitti \
 18 | --lr 4e-4 \
 19 | --image_size 384 512 \
 20 | --padding_factor 16 \
 21 | --upsample_factor 8 \
 22 | --with_speed_metric \
 23 | --val_freq 10000 \
 24 | --save_ckpt_freq 10000 \
 25 | --num_steps 100000 \
 26 | 2>&1 | tee -a ${CHECKPOINT_DIR}/train.log
 27 | 
 28 | # things (our final model is trained for 800K iterations, for ablation study, you can train for 200K)
 29 | CHECKPOINT_DIR=checkpoints/things-gmflow && \
 30 | mkdir -p ${CHECKPOINT_DIR} && \
 31 | python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \
 32 | --launcher pytorch \
 33 | --checkpoint_dir ${CHECKPOINT_DIR} \
 34 | --resume checkpoints/chairs-gmflow/step_100000.pth \
 35 | --stage things \
 36 | --batch_size 8 \
 37 | --val_dataset things sintel kitti \
 38 | --lr 2e-4 \
 39 | --image_size 384 768 \
 40 | --padding_factor 16 \
 41 | --upsample_factor 8 \
 42 | --with_speed_metric \
 43 | --val_freq 40000 \
 44 | --save_ckpt_freq 50000 \
 45 | --num_steps 800000 \
 46 | 2>&1 | tee -a ${CHECKPOINT_DIR}/train.log
 47 | 
 48 | # sintel
 49 | CHECKPOINT_DIR=checkpoints/sintel-gmflow && \
 50 | mkdir -p ${CHECKPOINT_DIR} && \
 51 | python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \
 52 | --launcher pytorch \
 53 | --checkpoint_dir ${CHECKPOINT_DIR} \
 54 | --resume checkpoints/things-gmflow/step_800000.pth \
 55 | --stage sintel \
 56 | --batch_size 8 \
 57 | --val_dataset sintel kitti \
 58 | --lr 2e-4 \
 59 | --image_size 320 896 \
 60 | --padding_factor 16 \
 61 | --upsample_factor 8 \
 62 | --with_speed_metric \
 63 | --val_freq 20000 \
 64 | --save_ckpt_freq 20000 \
 65 | --num_steps 200000 \
 66 | 2>&1 | tee -a ${CHECKPOINT_DIR}/train.log
 67 | 
 68 | # kitti
 69 | CHECKPOINT_DIR=checkpoints/kitti-gmflow && \
 70 | mkdir -p ${CHECKPOINT_DIR} && \
 71 | python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \
 72 | --launcher pytorch \
 73 | --checkpoint_dir ${CHECKPOINT_DIR} \
 74 | --resume checkpoints/sintel-gmflow/step_200000.pth \
 75 | --stage kitti \
 76 | --batch_size 8 \
 77 | --val_dataset kitti \
 78 | --lr 2e-4 \
 79 | --image_size 320 1152 \
 80 | --padding_factor 16 \
 81 | --upsample_factor 8 \
 82 | --with_speed_metric \
 83 | --val_freq 10000 \
 84 | --save_ckpt_freq 10000 \
 85 | --num_steps 100000 \
 86 | 2>&1 | tee -a ${CHECKPOINT_DIR}/train.log
 87 | 
 88 | 
 89 | # a final note: if your training is terminated unexpectedly, you can resume from the latest checkpoint
 90 | # an example: resume chairs training
 91 | # CHECKPOINT_DIR=checkpoints/chairs-gmflow && \
 92 | # mkdir -p ${CHECKPOINT_DIR} && \
 93 | # python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=9989 main.py \
 94 | # --launcher pytorch \
 95 | # --checkpoint_dir ${CHECKPOINT_DIR} \
 96 | # --resume checkpoints/chairs-gmflow/checkpoint_latest.pth \
 97 | # --batch_size 16 \
 98 | # --val_dataset chairs sintel kitti \
 99 | # --lr 4e-4 \
100 | # --image_size 384 512 \
101 | # --padding_factor 16 \
102 | # --upsample_factor 8 \
103 | # --with_speed_metric \
104 | # --val_freq 10000 \
105 | # --save_ckpt_freq 10000 \
106 | # --num_steps 100000 \
107 | # 2>&1 | tee -a ${CHECKPOINT_DIR}/train.log
108 | 
109 | 


--------------------------------------------------------------------------------
/egomono4d/repo/gmflow/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/egomono4d/tracking/track_predictor_cotracker.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | # from typing import Literal
  3 | from typing_extensions import Literal
  4 | import pdb
  5 | import os
  6 | 
  7 | import torch
  8 | import torch.nn.functional as F
  9 | from einops import rearrange
 10 | from jaxtyping import Float
 11 | from torch import Tensor
 12 | 
 13 | from .track_predictor import TrackPredictor, Tracks, sample_image_grid_tracker
 14 | import cotracker
 15 | from cotracker.predictor import CoTrackerPredictor
 16 | 
 17 | 
 18 | @dataclass
 19 | class TrackPredictorCoTrackerCfg:
 20 |     name: Literal["cotracker"]
 21 |     grid_size: int
 22 |     similarity_threshold: float
 23 |     cache_dir: str | None
 24 |     # cache_path: str | None
 25 | 
 26 | 
 27 | class TrackPredictorCoTracker(TrackPredictor[TrackPredictorCoTrackerCfg]):
 28 |     def __init__(self, cfg: TrackPredictorCoTrackerCfg) -> None:
 29 |         super().__init__(cfg)
 30 |         self.cache_dir = cfg.cache_dir
 31 |         # checkpoint = "scaled_offline.pth"
 32 |         checkpoint = "cotracker2.pth"
 33 |         self.tracker = CoTrackerPredictor(checkpoint=cfg.cache_dir+"/cotracker_checkpoints/"+checkpoint)
 34 |         grid_size = self.cfg.grid_size
 35 |         self.grid_queries = sample_image_grid_tracker((grid_size, grid_size))[None]
 36 |         self.grid_queries_init = False
 37 |         
 38 |         
 39 |     def calc_tracking(
 40 |         self,
 41 |         videos: Float[Tensor, "batch frame 3 height width"],
 42 |         query_frame: int,
 43 |         backward_tracking: bool=True
 44 |     ) -> Tracks:
 45 | 
 46 |         # (Michael) Ensuring that the coordinates of tracking points is INT for loss_tracking_robust.
 47 |         b, _, _, h, w = videos.shape
 48 |         if self.grid_queries_init is False:
 49 |             gs = self.grid_queries.clone()
 50 |             gs[..., 0] = gs[..., 0] * (w - 1) 
 51 |             gs[..., 1] = gs[..., 1] * (h - 1)
 52 |             gs = torch.round(gs).to(videos.device)
 53 |             self.grid_queries = gs.reshape(1, -1, 2)
 54 |             self.grid_queries_init = True
 55 | 
 56 |         queries = torch.cat([torch.zeros_like(self.grid_queries[:, :, :1], device=videos.device) * query_frame, self.grid_queries], dim=-1)
 57 | 
 58 |         # pdb.set_trace()
 59 |         xy, visibility = self.tracker(videos*255, queries=queries.repeat(b, 1, 1), grid_query_frame=query_frame, backward_tracking=backward_tracking)
 60 |         xy, visibility = self.tracker(
 61 |             videos * 255,
 62 |             queries=queries.repeat(b, 1, 1),
 63 |             # grid_size=self.cfg.grid_size,
 64 |             grid_query_frame=query_frame,
 65 |             backward_tracking=backward_tracking,
 66 |         )
 67 | 
 68 |         # Normalize the coordinates.
 69 |         b, f, _, h, w = videos.shape
 70 |         wh = torch.tensor((w-1, h-1), dtype=torch.float32, device=videos.device)
 71 |         xy = xy / wh
 72 | 
 73 |         # Filter visibility based on RGB values.
 74 |         rgb = F.grid_sample(
 75 |             rearrange(videos, "b f c h w -> (b f) c h w"),
 76 |             rearrange(xy, "b f p xy -> (b f) p () xy"),
 77 |             mode="bilinear",
 78 |             padding_mode="zeros",
 79 |             align_corners=False,
 80 |         )
 81 |         rgb = rearrange(rgb, "(b f) c p () -> b f p c", b=b, f=f)
 82 |         rgb_delta = (rgb[:, [query_frame]] - rgb).abs().norm(dim=-1)
 83 |         visibility = visibility & (rgb_delta < self.cfg.similarity_threshold)
 84 | 
 85 |         return Tracks(xy, visibility, 0)
 86 | 
 87 |     def forward(
 88 |         self,
 89 |         videos: Float[Tensor, "batch frame 3 height width"],
 90 |         query_frame: int,
 91 |     ) -> Tracks:
 92 |     
 93 |         if query_frame > 1:
 94 |             return self.calc_tracking(videos, query_frame, backward_tracking=True)
 95 |         elif query_frame == 0:
 96 |             return self.calc_tracking(videos, query_frame, backward_tracking=False)
 97 |         else:
 98 |             raise ValueError(f"Unsupport query_frame for co-trackerr, query_frame={query_frame}")
 99 | 
100 | 


--------------------------------------------------------------------------------
/egomono4d/misc/data_util.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, replace
  2 | 
  3 | import torch.nn.functional as F
  4 | from einops import rearrange
  5 | from jaxtyping import Float
  6 | from PIL import Image
  7 | import numpy as np
  8 | import pdb
  9 | import os
 10 | import torch
 11 | from torch import Tensor
 12 | from typing import Union
 13 | 
 14 | try:
 15 |     EVAL = os.environ['EVAL_MODE']
 16 | except:
 17 |     EVAL = 'False'
 18 | if EVAL not in ['True']:
 19 |     import open3d as o3d
 20 | 
 21 | 
 22 | @dataclass
 23 | class PreProcessingCfg:
 24 |     resize_shape: Union[tuple, int] = (300, 400)
 25 |     patch_size: int = 14
 26 |     num_frames: int = 4
 27 | 
 28 | 
 29 | def compute_patch_cropped_shape(
 30 |     shape: tuple,
 31 |     patch_size: int,
 32 | ) -> tuple:
 33 |     h, w = shape
 34 | 
 35 |     h_new = (h // patch_size) * patch_size
 36 |     w_new = (w // patch_size) * patch_size
 37 |     return h_new, w_new
 38 | 
 39 | 
 40 | def pil_resize_to_center_crop(
 41 |     image: Image.Image,
 42 |     resize_shape: tuple,
 43 |     cropped_shape: tuple,
 44 |     depth_process=False
 45 | ):  # -> tuple[
 46 |     #    Image.Image,  # the image itself
 47 |     #    tuple[int, int],  # image shape after scaling, before cropping
 48 |     
 49 |     w_old, h_old = image.size
 50 |     h_new, w_new = resize_shape
 51 |     h_crp, w_crp = cropped_shape
 52 | 
 53 |     # Figure out the scale factor needed to cover the desired shape with a uniformly
 54 |     # scaled version of the input image. Then, resize the input image.
 55 |     scale_factor = max(h_new / h_old, w_new / w_old)
 56 |     h_scaled = round(h_old * scale_factor)
 57 |     w_scaled = round(w_old * scale_factor)
 58 |     if depth_process is True:
 59 |         image_scaled = image.resize((w_scaled, h_scaled), Image.NEAREST)
 60 |     else:
 61 |         image_scaled = image.resize((w_scaled, h_scaled), Image.LANCZOS)
 62 | 
 63 |     # Center-crop the image.
 64 |     x = (w_scaled - w_crp) // 2
 65 |     y = (h_scaled - h_crp) // 2
 66 |     image_cropped = image_scaled.crop((x, y, x + w_crp, y + h_crp))
 67 |     return image_cropped, (h_scaled, w_scaled)
 68 | 
 69 | 
 70 | def resize_crop_intrinisic(
 71 |     intrinsics: Float[Tensor, "*#batch 3 3"],
 72 |     origin_shape: tuple,
 73 |     scaled_shape: tuple,
 74 |     croped_shape: tuple
 75 | ):
 76 |     h_old, w_old = origin_shape
 77 |     h_scl, w_scl = scaled_shape
 78 |     h_new, w_new = croped_shape
 79 | 
 80 |     # reshape updatation
 81 |     sx = w_scl / w_old
 82 |     sy = h_scl / h_old
 83 |     new_intrinsics = intrinsics.clone()
 84 |     new_intrinsics[..., 0, 0] *= sx
 85 |     new_intrinsics[..., 0, 2] *= sx
 86 |     new_intrinsics[..., 1, 1] *= sy
 87 |     new_intrinsics[..., 1, 2] *= sy
 88 | 
 89 |     # center_crop updataion
 90 |     offset_x = (w_scl - w_new) / 2
 91 |     offset_y = (h_scl - h_new) / 2
 92 |     new_intrinsics[0, 2] -= offset_x  
 93 |     new_intrinsics[1, 2] -= offset_y
 94 | 
 95 |     return new_intrinsics
 96 | 
 97 | 
 98 | def canonicalize_intrinisic(
 99 |     intrinsics: Float[Tensor, "*#batch 3 3"],
100 |     shape: tuple
101 | ):
102 |     # NOTE: (michael) Intrinsic Canonicalization to (1,1) size space for mixture dataset training. 
103 |     h, w = shape
104 |     new_intrinsics = intrinsics.clone()
105 |     new_intrinsics[..., 0, 0] = new_intrinsics[..., 0, 0] / w
106 |     new_intrinsics[..., 0, 2] = new_intrinsics[..., 0, 2] / w 
107 |     new_intrinsics[..., 1, 1] = new_intrinsics[..., 1, 1] / h
108 |     new_intrinsics[..., 1, 2] = new_intrinsics[..., 1, 2] / h 
109 |     return new_intrinsics
110 | 
111 | 
112 | def visualize_pcd_from_rgbd_fp(rgb_fp, depth_fp, intrinsic):
113 |     color = o3d.io.read_image(rgb_fp)
114 |     depth = o3d.io.read_image(depth_fp)
115 |     rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
116 |         color, depth, convert_rgb_to_intensity=False)
117 |     camera = o3d.camera.PinholeCameraIntrinsic()
118 |     H, W, _ = np.asarray(color).shape
119 |     camera.set_intrinsics(W, H, intrinsic[0,0], intrinsic[1,1], intrinsic[0,2], intrinsic[1,2])
120 |     pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera)
121 |     voxel_down_pcd = pcd.voxel_down_sample(voxel_size=0.025)
122 |     return voxel_down_pcd


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea
  2 | .DS_Store
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | cache/*
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # PyCharm
159 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
162 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 | 
165 | 
166 | outputs
167 | datasets
168 | wandb
169 | checkpoints
170 | *.ckpt
171 | *.pt
172 | figures
173 | tables
174 | results
175 | events.out.tfevents*
176 | *.ply
177 | *.mp4
178 | *.pkl
179 | 
180 | *.npy
181 | *.json
182 | *.tar.gz
183 | 
184 | # cache/cotracker_checkpoints/*
185 | # cache/data_custom/*
186 | # cache/ego_hos_checkpoints/*
187 | # cache/gmflow_checkpoints/*
188 | # cache/models/*
189 | # cache/original_datasets/*
190 | # cache/processed_datasets/*
191 | # cache/unidepth_v2_checkpoints/*
192 | 


--------------------------------------------------------------------------------